In [19]:
import pandas as pd
import numpy as np

In [20]:
# read data

data = pd.read_csv('data.csv')
data.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [21]:
# split into train and test dataset

ratio = 0.67
n_train = int(len(data) * ratio)
n_test = len(data) - n_train
indexes = np.zeros(data.shape[0])
indexes[np.random.choice(len(data), n_train, replace=False)] = 1

X = np.array(data[indexes == 1].values)[:,:-1]
y = np.array(data[indexes == 1].values)[:,-1]
test_x = np.array(data[indexes == 0].values)[:,:-1]
test_y = np.array(data[indexes == 0].values)[:,-1]

In [22]:
X = np.array(X, dtype=np.float32)
means, stds = X.mean(axis=0), X.std(axis=0)
X = (X - means) / stds

In [23]:
# each one vs all
y_setosa = [1 if x == 'Iris-setosa' else 0 for x in y]
y_versicolor = [1 if x == 'Iris-versicolor' else 0 for x in y]
y_virginica = [1 if x == 'Iris-virginica' else 0 for x in y]

In [24]:
X = np.hstack((np.ones((X.shape[0], 1)), X))

In [25]:
def mserror(y, y_pred):
    # count mean square error of prediction"
    return sum((y-y_pred)**2)

In [26]:
def hypothesis(X, w):
    # count hypothesis (sigmoid function)
    return 1 / (1 + np.exp (-(X.dot(w))))

In [27]:
def stochastic_gradient_step(X, y, w, train_ind, eta=0.01):
    # count gradient for current iteration
    hyp = hypothesis(X[train_ind], w)
    grads = []
    for i in range(len(X[train_ind])):
        grad_i = X[train_ind][i] * (hyp - y[train_ind])
        grads.append(grad_i)
    return  w - 2 * eta / len(X) * np.array(grads)

In [28]:
def stochastic_gradient_descent(X, y, w_init, eta=1e-2, max_iter=1e4,
                                min_weight_dist=1e-8, seed=42, verbose=False):
    # use stochastic gradient descent in order to find the most exact weights
    weight_dist = np.inf
    w = w_init
    errors = []
    iter_num = 0
    np.random.seed(seed)
        
    while weight_dist > min_weight_dist and iter_num < max_iter: 
        
        random_ind = np.random.randint(X.shape[0])        
        
        w_moved = stochastic_gradient_step(X, y, w, random_ind, eta)
        weight_dist = np.sqrt(np.sum((w-w_moved)**2))                                      
        w = w_moved
        
        y_pred = hypothesis(X, w)
        errors.append(mserror(y, y_pred))
        
        if verbose:
            print(w)
            
        iter_num += 1
        
    return w, errors

In [29]:
%%time
w_init = np.full(X.shape[1], 0)
setosa_weights, setosa_errors = stochastic_gradient_descent(X, y_setosa, w_init, min_weight_dist = 1e-10, max_iter=1e4, verbose=False)
print(setosa_weights)

[-0.23488482 -0.36597868  0.34662581 -0.50933344 -0.48935585]
CPU times: user 644 ms, sys: 1.94 ms, total: 646 ms
Wall time: 645 ms


In [30]:
%%time
w_init = np.full(X.shape[1], 0)
versicolor_weights, versicolor_errors = stochastic_gradient_descent(X, y_versicolor, w_init, min_weight_dist = 1e-10, max_iter=1e4, verbose=False)
print(versicolor_weights)

[-0.3152506   0.00625842 -0.32714093  0.09229935  0.0409763 ]
CPU times: user 663 ms, sys: 0 ns, total: 663 ms
Wall time: 660 ms


In [31]:
%%time
w_init = np.full(X.shape[1], 0)
virginica_weights, virginica_errors = stochastic_gradient_descent(X, y_virginica, w_init,  min_weight_dist = 1e-10, max_iter=1e4, verbose=False)
print(virginica_weights)

[-0.25497571  0.34865827 -0.02021459  0.40582299  0.44218885]
CPU times: user 663 ms, sys: 5.04 ms, total: 668 ms
Wall time: 662 ms


In [32]:
def classify(X, y, setosa_weights, versicolor_weights, virginica_weights, verbose = False):
    y_pred = []
    for row in X:
        x = row
        h_setosa = hypothesis(x, setosa_weights)
        h_versicolor = hypothesis(x, versicolor_weights)
        h_virginica = hypothesis(x, virginica_weights)
        if max(h_setosa, h_versicolor, h_virginica) == h_setosa:
            y_pred.append('Iris-setosa')
        elif max(h_setosa, h_versicolor, h_virginica) == h_versicolor:
            y_pred.append('Iris-versicolor')
        elif max(h_setosa, h_versicolor, h_virginica) == h_virginica:
            y_pred.append('Iris-virginica')
        if verbose:
            print (h_setosa, h_versicolor, h_virginica)
    y = pd.DataFrame(y)
    y.columns = ['class']
    y['predicted'] = y_pred
    return y            

In [33]:
# means, stds from sample
test_x = (test_x - means) / stds

In [34]:
test_x = np.hstack((np.ones((test_x.shape[0], 1)), test_x))

In [35]:
def accuracy(test_y):
    return sum(test_y['class'] == test_y['predicted']) / len (test_y)

In [36]:
test_y = classify(test_x, test_y, setosa_weights, versicolor_weights, virginica_weights)
test_y

Unnamed: 0,class,predicted
0,Iris-setosa,Iris-setosa
1,Iris-setosa,Iris-setosa
2,Iris-setosa,Iris-setosa
3,Iris-setosa,Iris-setosa
4,Iris-setosa,Iris-setosa
5,Iris-setosa,Iris-setosa
6,Iris-setosa,Iris-setosa
7,Iris-setosa,Iris-setosa
8,Iris-setosa,Iris-setosa
9,Iris-setosa,Iris-setosa


In [37]:
print ('Accuracy =', accuracy(test_y))

Accuracy = 0.76
