In [153]:
import pandas as pd
import numpy as np

In [154]:
data = pd.read_csv('train-data.csv')
data.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [155]:
X = data[['sepal_length','sepal_width','petal_length','petal_width']]
means, stds = X.mean(axis=0), X.std(axis=0)
X = (X - means) / stds
y = data['class']

In [156]:
# One vs all
y_setosa = y.replace('Iris-setosa', 1).replace('Iris-versicolor',0).replace('Iris-virginica',0)
y_versicolor = y.replace('Iris-setosa', 0).replace('Iris-versicolor',1).replace('Iris-virginica',0)
y_virginica = y.replace('Iris-setosa', 0).replace('Iris-versicolor',0).replace('Iris-virginica',1)

In [157]:
X = np.hstack((np.ones((X.shape[0], 1)), X))

In [158]:
def mserror(y, y_pred):
    "Mean square error of prediction"
    return sum((y-y_pred)**2)

In [159]:
def hypothesis(X, w):
    "Hypothesis (sigmoid function)"
    return 1 / (1 + np.exp (-(X.dot(w))))

In [160]:
def stochastic_gradient_step(X, y, w, train_ind, eta=0.01):
    grads = []
    for i in range(len(X[train_ind])):
        grad_i = X[train_ind][i] * (hypothesis(X[train_ind], w) - y[train_ind])
        grads.append(grad_i)
    return  w - 2 * eta / len(X) * np.array(grads)

In [161]:
def stochastic_gradient_descent(X, y, w_init, eta=1e-2, max_iter=1e4,
                                min_weight_dist=1e-8, seed=42, verbose=False):

    weight_dist = np.inf
    w = w_init
    errors = []
    iter_num = 0
    np.random.seed(seed)
        
    while weight_dist > min_weight_dist and iter_num < max_iter:
        
        random_ind = np.random.randint(X.shape[0])
        
        step_w = stochastic_gradient_step(X, y, w, random_ind, eta)
        weight_dist = np.sqrt(np.sum((w-step_w)**2)) 
                                     
        w = step_w
        y_pred = hypothesis(X, w)
        errors.append(mserror(y, y_pred))
        
        if verbose:
            print(w)
            
        iter_num += 1
        
    return w, errors

In [162]:
%%time
w_init = np.full(X.shape[1], 0)
setosa_weights, setosa_errors = stochastic_gradient_descent(X, y_setosa, w_init, min_weight_dist = 1e-10, max_iter=1e6, verbose=False)
print(setosa_weights)

[-2.18181386 -1.1204821   1.92045424 -2.21516918 -2.03796386]
CPU times: user 6min 13s, sys: 80.7 ms, total: 6min 13s
Wall time: 6min 13s


In [163]:
%%time
w_init = np.full(X.shape[1], 0)
versicolor_weights, versicolor_errors = stochastic_gradient_descent(X, y_versicolor, w_init, min_weight_dist = 1e-10, max_iter=1e6, verbose=False)
print(versicolor_weights)

[-0.98251377  0.10045355 -1.40894667  0.88793551 -0.99157127]
CPU times: user 6min 14s, sys: 140 ms, total: 6min 14s
Wall time: 6min 14s


In [164]:
%%time
w_init = np.full(X.shape[1], 0)
virginica_weights, virginica_errors = stochastic_gradient_descent(X, y_virginica, w_init,  min_weight_dist = 1e-10, max_iter=1e6, verbose=False)
print(virginica_weights)

[-3.58594844  0.00834309 -0.31258637  2.35742763  3.50737984]
CPU times: user 6min 25s, sys: 119 ms, total: 6min 26s
Wall time: 6min 26s


In [172]:
def classify(X, y, setosa_weights, versicolor_weights, virginica_weights, verbose = False):
    y_pred = []
    for row in X:
        x = row
        h_setosa = hypothesis(x, setosa_weights)
        h_versicolor = hypothesis(x, versicolor_weights)
        h_virginica = hypothesis(x, virginica_weights)
        if max(h_setosa, h_versicolor, h_virginica) == h_setosa:
            y_pred.append('Iris-setosa')
        elif max(h_setosa, h_versicolor, h_virginica) == h_versicolor:
            y_pred.append('Iris-versicolor')
        elif max(h_setosa, h_versicolor, h_virginica) == h_virginica:
            y_pred.append('Iris-virginica')
        if verbose:
            print (h_setosa, h_versicolor, h_virginica)
    y = pd.DataFrame(y)
    y['predicted'] = y_pred
    return y            

In [173]:
test_data = pd.read_csv('test-data.csv')
test_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.0,3.3,1.4,0.2,Iris-setosa
1,7.0,3.2,4.7,1.4,Iris-versicolor
2,6.4,2.8,5.6,2.1,Iris-virginica


In [174]:
test_X = test_data[['sepal_length','sepal_width','petal_length','petal_width']]
# means, stds from sample
test_X = (test_X - means) / stds
test_y = test_data[['class']]

In [175]:
test_X = np.hstack((np.ones((test_X.shape[0], 1)), test_X))

In [177]:
test_y = classify(test_X, test_y, setosa_weights, versicolor_weights, virginica_weights)
test_y

Unnamed: 0,class,predicted
0,Iris-setosa,Iris-setosa
1,Iris-versicolor,Iris-versicolor
2,Iris-virginica,Iris-virginica
