In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import SGDClassifier
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import MDS
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

%matplotlib inline

In [5]:
data = load_iris()

df = pd.DataFrame(data.data,columns=data.feature_names)
df['target'] = data.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [6]:
var = [x for x in df.columns if x!='target']

In [7]:
X = df[var].copy()
y = df['target'].copy()

In [None]:
sc = MinMaxScaler()
sc.fit(X)
Xs = pd.DataFrame(sc.transform(X),columns=X.columns)
Xs.head()

In [9]:
Xt, Xv, yt, yv = train_test_split(X,y,train_size=0.7)



In [35]:
param=dict(loss = ['hinge', 'log', 'modified_huber','squared_hinge', 'perceptron'],
penalty = ['none', 'l2', 'l1','elasticnet'],
learning_rate=['constant','optimal','invscaling','adaptive'],
alpha = np.arange(0.0001,0.001,0.0001),
          eta0=np.arange(0.1,1,0.1))

In [36]:
modelo = SGDClassifier()

In [38]:
modelo.fit(Xt,yt)



SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False)

In [39]:
print(accuracy_score(y_true=yt,y_pred=modelo.predict(Xt)))
print(accuracy_score(y_true=yv,y_pred=modelo.predict(Xv)))

0.9428571428571428
0.9555555555555556


In [45]:
grid = RandomizedSearchCV(n_jobs=-1,scoring='accuracy',n_iter=1000,
                    param_distributions=param,
                    cv=4,
                    estimator=modelo,
                   verbose=True)

In [46]:
%%time
grid.fit(Xt,yt)

Fitting 4 folds for each of 1000 candidates, totalling 4000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 221 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 3221 tasks      | elapsed:    8.6s


CPU times: user 4.69 s, sys: 73.1 ms, total: 4.77 s
Wall time: 10.4 s


[Parallel(n_jobs=-1)]: Done 4000 out of 4000 | elapsed:   10.3s finished


RandomizedSearchCV(cv=4, error_score='raise-deprecating',
          estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False),
          fit_params=None, iid='warn', n_iter=1000, n_jobs=-1,
          param_distributions={'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'], 'penalty': ['none', 'l2', 'l1', 'elasticnet'], 'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'], 'alpha': array([0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008,
       0.0009]), 'eta0': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          

In [47]:
modelo = grid.best_estimator_

In [48]:
grid.best_score_

0.9428571428571428

In [49]:
grid.best_params_

{'penalty': 'l1',
 'loss': 'perceptron',
 'learning_rate': 'invscaling',
 'eta0': 0.30000000000000004,
 'alpha': 0.0009000000000000001}

In [50]:
print(accuracy_score(y_true=yt,y_pred=modelo.predict(Xt)))
print(accuracy_score(y_true=yv,y_pred=modelo.predict(Xv)))

0.9523809523809523
0.9111111111111111
