# Stochastic Gradient Descent for Logistic Regression Problem
### Author: Rachit Shah (rshah25)

In [1]:
#Requirements
from sklearn import datasets
import math
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
#Sigmoid Function
def sigmoid(x):
    return 1/(1+math.exp(-x))

#Predict y based on new features
def predict(features,betas):
    yp = np.dot(betas, np.append(features, 1))
    return sigmoid(yp)

## Stochastic gradient descent:

$weights \leftarrow weights - learningRate \cdot (predict(X_i) - actualy_i) \cdot X_i$

In [3]:
def update_weights(betas,x,y,lr):
    yp = predict(x,betas)
    betas -= lr * (yp-y) * np.append(x,1)
    
def train_sgd(X,y,lr,epochs):
    betas = np.zeros(X.shape[1] + 1)
    for epoch in range(epochs):
        for i in range(X.shape[0]):
            update_weights(betas, X[i,:], y[i], lr)
    return betas
    

In [4]:
#Test Code on Dummy dataset (Breast Cancer dataset from scikit-learn datasets with 2 classes)
def main():
    #Import Dataset
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    #Split train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)
    
    #normalize all features to 0 and 1 values
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)

    #Learning Rate and Number of Epochs
    lr = 0.001
    epochs = 100

    #Train
    betas = train_sgd(X_train,y_train,lr,epochs)
    
    #Normalize test set too
    scaler.fit(X_test)
    X_test = scaler.transform(X_test)

    #Get accuracy on test data
    test_predictions = [round(predict(x,betas)) for x in X_test]
    test_predictions
    accuracy = sum([test_predictions[i]==y_test[i] for i in range(len(y_test))])/len(y_test)
    print("Accuracy =",accuracy)

In [5]:
if __name__ == '__main__':
    main()

Accuracy = 0.9574468085106383
