In [10]:
import pandas as pd
import sklearn
import numpy as np

from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV


In [6]:
x, y = make_classification(
    n_samples = 1000,
    n_features = 20,
    n_informative = 10,
    n_redundant = 5,
    random_state = 42
)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size = 0.2,
    random_state = 42
)

In [9]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(800, 20)
(200, 20)
(800,)
(200,)


In [8]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [25]:
loss = ["hinge", "log_loss", "modified_huber", "squared_hinge"]
penalty = ["l2", "l2"]
alpha = [0.01, 0.001, 0.0001]
l1_ratio = [0.10, 0.15, 0.25]
fit_intercept = [True, False]
max_iter = [1000]
tol = [1e-3, 1e-0]
shuffle = [True, False]
n_jobs= [-1]
#ela0 = [0.0, 0.1, 0.01]
n_iter_no_change = [5, 7, 10]
early_stopping = [False, True]


In [26]:
param_grid = {
    "loss" : loss,
    "penalty" : penalty,
    "l1_ratio" : l1_ratio,
    "fit_intercept" : fit_intercept,
    "max_iter" : max_iter,
    "tol" : tol,
    "shuffle" : shuffle,
    "n_jobs" : n_jobs,
    #"ela0" : ela0,
    "n_iter_no_change" : n_iter_no_change,
    "early_stopping" : early_stopping  
}

In [27]:
sgd = SGDClassifier()

sgd_grid = GridSearchCV(estimator = sgd, param_grid= param_grid, cv = 5, verbose = 2)


In [28]:
sgd_grid.fit(x_train, y_train)

Fitting 5 folds for each of 1152 candidates, totalling 5760 fits
[CV] END early_stopping=False, fit_intercept=True, l1_ratio=0.1, loss=hinge, max_iter=1000, n_iter_no_change=5, n_jobs=-1, penalty=l2, shuffle=True, tol=0.001; total time=   0.0s
[CV] END early_stopping=False, fit_intercept=True, l1_ratio=0.1, loss=hinge, max_iter=1000, n_iter_no_change=5, n_jobs=-1, penalty=l2, shuffle=True, tol=0.001; total time=   0.0s
[CV] END early_stopping=False, fit_intercept=True, l1_ratio=0.1, loss=hinge, max_iter=1000, n_iter_no_change=5, n_jobs=-1, penalty=l2, shuffle=True, tol=0.001; total time=   0.0s
[CV] END early_stopping=False, fit_intercept=True, l1_ratio=0.1, loss=hinge, max_iter=1000, n_iter_no_change=5, n_jobs=-1, penalty=l2, shuffle=True, tol=0.001; total time=   0.0s
[CV] END early_stopping=False, fit_intercept=True, l1_ratio=0.1, loss=hinge, max_iter=1000, n_iter_no_change=5, n_jobs=-1, penalty=l2, shuffle=True, tol=0.001; total time=   0.0s
[CV] END early_stopping=False, fit_inter

In [29]:
sgd_grid.best_params_

{'early_stopping': False,
 'fit_intercept': False,
 'l1_ratio': 0.15,
 'loss': 'log_loss',
 'max_iter': 1000,
 'n_iter_no_change': 10,
 'n_jobs': -1,
 'penalty': 'l2',
 'shuffle': True,
 'tol': 0.001}

In [30]:
sgd_grid.best_score_

np.float64(0.8362499999999999)