# SGDClassifier Using the SKlearn

In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Creating the custom dataset

In [2]:
X, y = make_classification(n_samples=50000, n_features=15, n_informative=10,
                           n_redundant=5, n_classes=2, weights=[0.7],
                           random_state=15)

In [3]:
X.shape, y.shape

((50000, 15), (50000,))

## Splitting train and test dataset

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=15)
for var in [X_train, X_test, y_train, y_test]:
    print(getattr(var, "shape"))

(37500, 15)
(12500, 15)
(37500,)
(12500,)


## SGD Classifier

In [5]:
clf = SGDClassifier(loss="log_loss", penalty="l2", alpha=0.0001, tol=1e-3, verbose=2, learning_rate="constant",
                    eta0=0.0001, random_state=15)
clf

In [6]:
clf.fit(X_train, y_train)

-- Epoch 1
Norm: 0.91, NNZs: 15, Bias: -0.219807, T: 37500, Avg. loss: 0.346258
Total training time: 0.02 seconds.
-- Epoch 2
Norm: 1.10, NNZs: 15, Bias: -0.322775, T: 75000, Avg. loss: 0.275405
Total training time: 0.03 seconds.
-- Epoch 3
Norm: 1.21, NNZs: 15, Bias: -0.403681, T: 112500, Avg. loss: 0.265203
Total training time: 0.05 seconds.
-- Epoch 4
Norm: 1.28, NNZs: 15, Bias: -0.471583, T: 150000, Avg. loss: 0.260826
Total training time: 0.06 seconds.
-- Epoch 5
Norm: 1.32, NNZs: 15, Bias: -0.533062, T: 187500, Avg. loss: 0.258608
Total training time: 0.08 seconds.
-- Epoch 6
Norm: 1.35, NNZs: 15, Bias: -0.584082, T: 225000, Avg. loss: 0.257197
Total training time: 0.10 seconds.
-- Epoch 7
Norm: 1.38, NNZs: 15, Bias: -0.627464, T: 262500, Avg. loss: 0.256315
Total training time: 0.11 seconds.
-- Epoch 8
Norm: 1.39, NNZs: 15, Bias: -0.666090, T: 300000, Avg. loss: 0.255706
Total training time: 0.13 seconds.
-- Epoch 9
Norm: 1.40, NNZs: 15, Bias: -0.698887, T: 337500, Avg. loss: 0.

In [7]:
clf.coef_, clf.coef_.shape, clf.intercept_

(array([[-0.57379607,  0.22043096, -0.22738977,  0.43699239, -0.29753916,
          0.77548161, -0.5660167 , -0.11890761,  0.25640713,  0.22931039,
          0.24620845,  0.00329139, -0.09110758,  0.42898133,  0.02775948]]),
 (1, 15),
 array([-0.75294429]))

In [8]:
def initialize_weights(row_vec):
    w = np.zeros(row_vec.shape[0])
    return w, 0

In [9]:
dim = X_train[0]
w, b = initialize_weights(dim)


def grader_weights(w, b):
    assert ((len(w) == len(dim)) and b == 0 and np.sum(w) == 0.0)
    return True


grader_weights(w, b)

True

In [10]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [11]:
def grader_sigmoid(z):
    val = sigmoid(z)
    assert (val == 0.8807970779778823)
    return True


grader_sigmoid(2)

True

In [12]:
def log_loss(y_true, y_pred):
    eps = 1e-15
    y_pred = np.clip(y_pred, eps, 1 - eps)
    loss_func = np.vectorize(lambda: (y_true * np.log10(y_pred)) + ((1 - y_true) * np.log10(1 - y_pred)))
    return -np.mean(loss_func())

In [13]:
#round off the value to 8 values
def grader_log_loss(true, pred):
    loss = log_loss(true, pred)
    assert (np.round(loss, 6) == 0.076449)
    return True


true = np.array([1, 1, 0, 1, 0])
pred = np.array([0.9, 0.8, 0.1, 0.8, 0.2])
grader_log_loss(true, pred)

True

In [14]:
def model(x, w, b):
    return np.dot(w, x) + b


def gradient_dw(x, y, w, b, alpha, N):
    y_pred = sigmoid(model(x, w, b))
    return (y - y_pred) * x - (alpha / N) * w


In [15]:
def grader_dw(x,y,w,b,alpha,N):
  grad_dw=gradient_dw(x,y,w,b,alpha,N)
  assert(np.round(np.sum(grad_dw),5)==4.75684)
  return True
grad_x=np.array([-2.07864835,  3.31604252, -0.79104357, -3.87045546, -1.14783286,
       -2.81434437, -0.86771071, -0.04073287,  0.84827878,  1.99451725,
        3.67152472,  0.01451875,  2.01062888,  0.07373904, -5.54586092])
grad_y=0
grad_w=np.array([ 0.03364887,  0.03612727,  0.02786927,  0.08547455, -0.12870234,
       -0.02555288,  0.11858013,  0.13305576,  0.07310204,  0.15149245,
       -0.05708987, -0.064768  ,  0.18012332, -0.16880843, -0.27079877])
grad_b=0.5
alpha=0.0001
N=len(X_train)
grader_dw(grad_x,grad_y,grad_w,grad_b,alpha,N)

True

In [16]:
def gradient_db(x,y,w,b):
    y_pred = sigmoid(model(x,w,b))
    return y - y_pred

In [17]:
def grader_db(x,y,w,b):
  grad_db=gradient_db(x,y,w,b)
  assert(np.round(grad_db,4)==-0.3714)
  return True
grad_x=np.array([-2.07864835,  3.31604252, -0.79104357, -3.87045546, -1.14783286,
       -2.81434437, -0.86771071, -0.04073287,  0.84827878,  1.99451725,
        3.67152472,  0.01451875,  2.01062888,  0.07373904, -5.54586092])
grad_y=0.5
grad_b=0.1
grad_w=np.array([ 0.03364887,  0.03612727,  0.02786927,  0.08547455, -0.12870234,
       -0.02555288,  0.11858013,  0.13305576,  0.07310204,  0.15149245,
       -0.05708987, -0.064768  ,  0.18012332, -0.16880843, -0.27079877])
alpha=0.0001
N=len(X_train)
grader_db(grad_x,grad_y,grad_w,grad_b)

True

In [18]:
# prediction function used to compute predicted_y given the dataset X
def pred(w,b, X):
    N = len(X)
    predict = []
    for i in range(N):
        z=np.dot(w,X[i])+b
        predict.append(sigmoid(z))
    return np.array(predict)


In [19]:
def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0):
    ''' In this function, we will implement logistic regression'''
    #Here eta0 is learning rate
    #implement the code as follows
    # initalize the weights (call the initialize_weights(X_train[0]) function)
    train_loss = []
    test_loss = []
    w,b = initialize_weights(X_train[0])
    N = len(X_train)
    # for every epoch
    for i in range(epochs):
        # for every data point(X_train,y_train)
        for x,y in zip(X_train,y_train):
           #compute gradient w.r.to w (call the gradient_dw() function)
            dw =gradient_dw(x,y,w,b,alpha,N)
           #compute gradient w.r.to b (call the gradient_db() function)
            db = gradient_db(x,y,w,b)
           #update w, b
            w += dw * eta0
            b += db * eta0
        # # predict the output of x_train [for all data points in X_train] using pred function with updated weights
        # train_pred = pred(w,b,X_train)
        # #compute the loss between predicted and actual values (call the loss function)
        # train_loss.append(log_loss(y_train,train_pred))
        # # store all the train loss values in a list
        # # predict the output of x_test [for all data points in X_test] using pred function with updated weights
        # test_pred = pred(w,b,X_test)
        # #compute the loss between predicted and actual values (call the loss function)
        # test_loss.append(log_loss(y_test,test_pred))
        # # store all the test loss values in a list
        # # you can also compare previous loss and current loss, if loss is not updating then stop the process
        # # you have to return w,b , train_loss and test loss
    return w,b,[],[]

In [20]:
alpha=0.001
eta0=0.001
N=len(X_train)
epochs=20
w,b,train_loss,test_loss=train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)

In [21]:
w,b

(array([-5.91147981e-01,  2.35617318e-01, -2.38138029e-01,  4.26554680e-01,
        -3.48277037e-01,  8.22876462e-01, -5.41422520e-01, -1.26338073e-01,
         2.78172123e-01,  2.27355740e-01,  2.48352566e-01, -2.14586376e-02,
        -7.79776280e-02,  4.52570804e-01,  5.66709887e-04]),
 -0.9263353135153597)

In [22]:
#this grader function should return True
#the difference between custom weights and clf.coef_ should be less than or equal to 0.05
def differece_check_grader(w,b,coef,intercept):
    val_array=np.abs(np.array(w-coef))
    assert(np.all(val_array<=0.5))
    print('The custom weights are correct')
    return True
differece_check_grader(w,b,clf.coef_,clf.intercept_)

The custom weights are correct


True

In [23]:
w,b

(array([-5.91147981e-01,  2.35617318e-01, -2.38138029e-01,  4.26554680e-01,
        -3.48277037e-01,  8.22876462e-01, -5.41422520e-01, -1.26338073e-01,
         2.78172123e-01,  2.27355740e-01,  2.48352566e-01, -2.14586376e-02,
        -7.79776280e-02,  4.52570804e-01,  5.66709887e-04]),
 -0.9263353135153597)

In [24]:
clf.coef_,clf.intercept_

(array([[-0.57379607,  0.22043096, -0.22738977,  0.43699239, -0.29753916,
          0.77548161, -0.5660167 , -0.11890761,  0.25640713,  0.22931039,
          0.24620845,  0.00329139, -0.09110758,  0.42898133,  0.02775948]]),
 array([-0.75294429]))