<h1>Stochastic Gradient Descent<h1>

In [1]:
import pandas as pd
import numpy as np

<h2>Importing SGD Classifier & Accuracy Metrics<h2>

In [46]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score,f1_score

<h3>Reading Train Test Split<h3>

In [3]:
x_train = pd.read_csv('X_train.csv')
x_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

In [4]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(3305, 16)
(3305, 1)
(827, 16)
(827, 1)


In [5]:
x_train.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,1,8,2,2,0,0.006244,0,0,0,8,9,0.092593,1,0.104598,1,0
1,4,4,1,1,0,0.006687,0,0,0,12,6,0.04065,7,0.409195,2,0
2,2,8,2,2,0,0.124195,0,0,0,8,7,0.212285,3,0.088506,2,2
3,4,4,1,2,0,0.019987,0,0,0,12,5,0.072719,3,0.208046,10,2
4,3,10,1,1,0,0.006281,1,1,1,5,2,0.084011,5,0.217241,5,1


<h2>Training the model without feature selection & hyper-parameter tuning

In [7]:
base_sgd_model = SGDClassifier(n_jobs=-1)
base_sgd_model.fit(x_train,y_train.values.ravel())
y_pred = base_sgd_model.predict(x_test)

In [8]:
accuracy_score(y_pred,y_test)

0.7642079806529625

In [9]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[486,  40],
       [155, 146]])

In [10]:
precision_score(y_true=y_test, y_pred=y_pred)

0.7849462365591398

In [11]:
recall_score(y_true=y_test, y_pred=y_pred)

0.4850498338870432

In [47]:
f1_score(y_true=y_test, y_pred=y_pred)

0.5995893223819302

<h2>Feature Selection<h2>

In [12]:
from sklearn.feature_selection import RFE

In [13]:
def rfe_selector(X, y, num_feats):
    model = SGDClassifier(n_jobs=-1)
    rfe = RFE(estimator=model, n_features_to_select=num_feats, step=1, verbose=5)
    rfe.fit(X,y.values.ravel())
    rfe_support = rfe.get_support()
    rfe_feature = list(X.loc[:,rfe_support].columns)
    return rfe_support, rfe_feature

In [39]:
rfe_support, rfe_feature = rfe_selector(x_train, y_train,8)
rfe_feature

Fitting estimator with 16 features.
Fitting estimator with 15 features.
Fitting estimator with 14 features.
Fitting estimator with 13 features.
Fitting estimator with 12 features.
Fitting estimator with 11 features.
Fitting estimator with 10 features.
Fitting estimator with 9 features.


['age',
 'education',
 'default',
 'housing',
 'loan',
 'contact',
 'duration',
 'poutcome']

<h3>Selected Features<h3>

In [40]:
print(rfe_feature)

['age', 'education', 'default', 'housing', 'loan', 'contact', 'duration', 'poutcome']


In [41]:
feature_sgd_model = base_sgd_model.fit(x_train[rfe_feature],y_train.values.ravel())
y_feature_pred = feature_sgd_model.predict(x_test[rfe_feature])

In [42]:
accuracy_score(y_true=y_test, y_pred=y_feature_pred)

0.7944377267230955

In [43]:
confusion_matrix(y_true=y_test, y_pred=y_feature_pred)

array([[442,  84],
       [ 86, 215]])

In [44]:
precision_score(y_true=y_test, y_pred=y_feature_pred)

0.7190635451505016

In [45]:
recall_score(y_true=y_test, y_pred=y_feature_pred)

0.7142857142857143

In [48]:
f1_score(y_true=y_test, y_pred=y_feature_pred)

0.7166666666666668

<h2>Hyper-Parameter Tuning<h2>

In [49]:
from sklearn.model_selection import RandomizedSearchCV

In [52]:
loss = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']
penalty = ['l1', 'l2', 'elasticnet']
alpha = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
learning_rate = ['constant', 'optimal', 'invscaling', 'adaptive']
eta0 = [1, 10, 25]
para_dict = {'loss':loss, 'penalty':penalty, 'alpha':alpha, 'learning_rate':learning_rate, 'eta0':eta0}

In [64]:
parameter_model = SGDClassifier()
parameter_sgd_model = RandomizedSearchCV(estimator=parameter_model, param_distributions=para_dict, n_jobs=-1, verbose=0, cv=5, n_iter=200, random_state=10)

In [65]:
parameter_sgd_model.fit(x_train[rfe_feature],y_train.values.ravel())
y_parameter_pred = parameter_sgd_model.predict(x_test[rfe_feature])



In [66]:
accuracy_score(y_true=y_test, y_pred=y_parameter_pred)

0.8041112454655381

In [68]:
confusion_matrix(y_true=y_test, y_pred=y_parameter_pred)

array([[441,  85],
       [ 77, 224]])

In [67]:
precision_score(y_true=y_test, y_pred=y_parameter_pred)

0.7249190938511327

In [69]:
recall_score(y_true=y_test, y_pred=y_parameter_pred)

0.7441860465116279

In [70]:
f1_score(y_true=y_test, y_pred=y_parameter_pred)

0.7344262295081967