<a href="https://colab.research.google.com/github/PabloCGarcia/bioDeepLearning/blob/main/Clase02_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# create a pipeline object
pipe = make_pipeline(
     StandardScaler(),
     LogisticRegression()
)

# load the iris dataset and split it into train and test sets
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# fit the whole pipeline
tmp = pipe.fit(X_train, y_train)
print(tmp)
# we can now use it like any other estimator
accuracy_score(pipe.predict(X_test), y_test)

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from scipy.stats import randint

X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# define the parameter space that will be searched over
param_distributions = {'n_estimators': randint(1, 5),
                       'max_depth': randint(5, 10)}

# now create a searchCV object and fit it to the data
search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0),
                            n_iter=5,
                            param_distributions=param_distributions,
                            random_state=0)
tmp= search.fit(X_train, y_train)
print(tmp)
print(search.best_params_)

# the search object now acts like a normal random forest estimator
# with max_depth=9 and n_estimators=4
search.score(X_test, y_test)

In [None]:
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler


# different learning rate schedules and momentum parameters
params = [{'solver': 'sgd', 'learning_rate': 'constant', 'momentum': 0,
           'learning_rate_init': 0.2},
          {'solver': 'sgd', 'learning_rate': 'constant', 'momentum': .9,
           'nesterovs_momentum': False, 'learning_rate_init': 0.2},
          {'solver': 'sgd', 'learning_rate': 'constant', 'momentum': .9,
           'nesterovs_momentum': True, 'learning_rate_init': 0.2},
          {'solver': 'sgd', 'learning_rate': 'invscaling', 'momentum': 0,
           'learning_rate_init': 0.2},
          {'solver': 'sgd', 'learning_rate': 'invscaling', 'momentum': .9,
           'nesterovs_momentum': True, 'learning_rate_init': 0.2},
          {'solver': 'sgd', 'learning_rate': 'invscaling', 'momentum': .9,
           'nesterovs_momentum': False, 'learning_rate_init': 0.2},
          {'solver': 'adam', 'learning_rate_init': 0.01}]

labels = ["constant learning-rate", "constant with momentum",
          "constant with Nesterov's momentum",
          "inv-scaling learning-rate", "inv-scaling with momentum",
          "inv-scaling with Nesterov's momentum", "adam"]

plot_args = [{'c': 'red', 'linestyle': '-'},
             {'c': 'green', 'linestyle': '-'},
             {'c': 'blue', 'linestyle': '-'},
             {'c': 'red', 'linestyle': '--'},
             {'c': 'green', 'linestyle': '--'},
             {'c': 'blue', 'linestyle': '--'},
             {'c': 'black', 'linestyle': '-'}]




In [None]:
iris = load_iris()

name="Iris"
print("\nlearning on dataset %s" % name)

X = MinMaxScaler().fit_transform(iris.data)
mlps = []
max_iter = 20

for label, param in zip(labels, params):
    print("training: %s" % label)
    mlp = MLPClassifier(verbose=0, random_state=0,
                        max_iter=max_iter, **param)
    mlp.fit(X, iris.target)
    mlps.append(mlp)
    print("Training set score: %f" % mlp.score(X, iris.target))
    print("Training set loss: %f" % mlp.loss_)


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
for mlp, label, args in zip(mlps, labels, plot_args):
        ax.plot(mlp.loss_curve_, label=label, **args)

fig.legend(ax.get_lines(), labels, ncol=3, loc="upper center")
plt.show()

In [None]:
import seaborn as sns
import pandas as pd

In [None]:
data = pd.DataFrame(data=iris.data, columns=["sepal_length", "sepal_width", "petal_length", "petal_width"])
 
# Append class / label data
data["class"] = iris.target

a=sns.pairplot(data,hue='class',palette="muted",size=5,vars=['sepal_width','sepal_length','petal_length','petal_width'],kind='scatter')

#to change the size of scatterpoints
a=a.map_offdiag(plt.scatter,s=35,alpha=0.9)

#remove the top and the right lines
sns.despine()

#additional line to adjust some appearance issues
plt.subplots_adjust(top=0.9)

#set the title of the graph
a.fig.suptitle('Relation between Sepal Width and Sepal Length',fontsize=20,color='b',alpha=0.5)

In [None]:
digits = datasets.load_digits()

name="digits"
print("\nlearning on dataset %s" % name)
X = MinMaxScaler().fit_transform(digits.data)
mlps = []
max_iter = 15

for label, param in zip(labels, params):
    print("training: %s" % label)
    mlp = MLPClassifier(verbose=0, random_state=0,
                        max_iter=max_iter, **param)
    mlp.fit(X, digits.target)
    mlps.append(mlp)
    print("Training set score: %f" % mlp.score(X, digits.target))
    print("Training set loss: %f" % mlp.loss_)


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
for mlp, label, args in zip(mlps, labels, plot_args):
        ax.plot(mlp.loss_curve_, label=label, **args)

fig.legend(ax.get_lines(), labels, ncol=3, loc="upper center")
plt.show()

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml

from sklearn.neural_network import MLPClassifier

mnist = fetch_openml("mnist_784")
# rescale the data, use the traditional train/test split
X, y = mnist.data / 255., mnist.target
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
#mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
#                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
#                    learning_rate_init=.1)

mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))

fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
    ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
               vmax=.5 * vmax)
    ax.set_xticks(())
    ax.set_yticks(())

plt.show()