In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale

# Functions

In [2]:
def load_cancer(data_file):
    data = pd.read_csv(data_file, header=None)
    
    y_data = data[1].apply(lambda x: 1 if x == 'M' else 0) #Fix the Labels
    x_data = data[list(range(3,13))] #Only the means of measurments remain
    
    # Scale and shuffle the dataset
    indices = np.random.permutation(data.shape[0])
    X_train = scale(x_data.values)[indices]
    y_train = y_data.values[indices]
    
    return X_train, y_train

In [3]:
def update_weights(weights, X, y):
    alpha = 0.4
    
    y_est = 1 / (1 + np.exp(-np.dot(weights, X))) #Sigmoid
    step = alpha * (y - y_est) * X
    updated_weights = weights + step
    
    return updated_weights                        

# Stochastic Gradient Descent

In [4]:
X_train, y_train = load_cancer('cancer_dataset/wdbc.data')

ws = pd.DataFrame(columns=['W0', 'W1', 'W2', 'W3', 'W4', 'W5', 'W6', 'W7', 'W8', 'W9'])
weights = np.ones(X_train.shape[1])

for i in range(X_train.shape[0]):
    weights = update_weights(weights, X_train[i], y_train[i])
    ws.loc[i] = weights

# Final Result

In [5]:
ws

Unnamed: 0,W0,W1,W2,W3,W4,W5,W6,W7,W8,W9
0,1.000002,1.000002,1.000001,1.000001,1.000002,1.000002,1.000002,1.000002,1.000000,1.000002
1,1.000014,1.000177,1.000158,1.000226,1.000232,1.000139,1.000147,1.000230,1.000184,1.000103
2,1.039333,1.022554,1.022158,0.959383,0.995206,1.015403,1.000856,1.004477,1.008619,1.007005
3,1.040188,1.023294,1.022834,0.960372,0.995742,1.015967,1.001469,1.003888,1.008659,1.007738
4,1.040451,1.025015,1.024352,0.959135,0.996927,1.017343,1.002804,1.005891,1.007903,1.009279
5,1.041946,1.026766,1.025903,0.958068,0.997789,1.019073,1.004395,1.006035,1.007499,1.010574
6,1.041935,1.026772,1.025909,0.958104,0.997813,1.019089,1.004413,1.006058,1.007521,1.010586
7,1.041833,1.027035,1.026128,0.958343,0.998086,1.019329,1.004663,1.006204,1.007471,1.010762
8,1.042740,1.028092,1.027107,0.957418,0.999310,1.020317,1.005543,1.008773,1.008177,1.010527
9,0.939699,0.758883,0.781962,1.048411,0.669374,0.720752,0.868749,0.737914,1.012130,1.343916
