In [71]:
import numpy as np
import pandas as pd
import random
from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)
#ok

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [72]:
def load_data():
    data = np.array(pd.read_csv('data.csv'))
    np.random.shuffle(data)

    train_data = data[0:60000].T
    nx = train_data.shape[1]
    train_X = train_data[1:nx]
    train_X = train_X/255.0
    train_Y = train_data[0]

    test_data = data[60000:70000].T
    nx = test_data.shape[1]
    test_X = test_data[1:nx]
    test_X = test_X/255.0
    test_Y = test_data[0]

    return train_X, train_Y, test_X, test_Y


In [73]:
def initialize_parameters(layers_dims):
    nx = layers_dims[0]
    nh = layers_dims[1]
    nop = layers_dims[2]
    W1 = np.random.rand(nh, nx)*0.01
    b1 = np.zeros((nh, 1))
    W2 = np.random.rand(nop, nh)*0.01
    b2 = np.zeros((nop, 1))
    return W1, b1, W2, b2

In [74]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A

def sigmoid_prime(Z):
    s = sigmoid(Z)
    dZ = s*(1-s)
    return dZ

In [75]:
def relu(Z):
    return np.maximum(0,Z)

def relu_prime(Z):
    return Z>0

In [76]:
def softmax(Z):
    A = np.exp(Z)/sum(np.exp(Z))
    return A

In [77]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2


In [78]:
def normalize(Y):
    normalized_Y = np.zeros((Y.size, 10))
    normalized_Y[np.arange(Y.size), Y] = 1
    normalized_Y = normalized_Y.T
    return normalized_Y

In [79]:
def backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2):
    m = X.shape[1]
    Y = normalize(Y)

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2)/m

    dZ1 = np.dot(W2.T, dZ2)*relu_prime(Z1)
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1)/m
    
    return dW1, db1, dW2, db2

In [80]:
def update_parameters(W1, W2, dW1, dW2, b1, b2, db1, db2, learning_rate):
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1    
    W2 = W2 - learning_rate*dW2  
    b2 = b2 - learning_rate*db2    
    return W1, b1, W2, b2

In [81]:
def predict(A2):
    return np.argmax(A2, 0)

In [82]:
def compute_cost(Y_hat, Y):
    return 0.5*(np.linalg.norm(Y-Y_hat)**2)/Y.size

def compute_accuracy(Y_hat, Y):
    #print(Y_hat, Y)
    return (np.sum(Y_hat==Y)*100)/Y.size


In [83]:
def model(X, Y, layers_dims, learning_rate, num_of_iteration):
    W1, b1, W2, b2 = initialize_parameters(layers_dims)
    for i in range(num_of_iteration):
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        dW1, db1, dW2, db2 = backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2)
        W1, b1, W2, b2 = update_parameters(W1, W2, dW1, dW2, b1, b2, db1, db2, learning_rate)
        if i % 50 == 0 or i == num_of_iteration-1:
            predictions = predict(A2)
            print("Accuracy after iteration", i,": ", compute_accuracy(predictions,Y))
            print("Cost after iteration", i,": ", compute_cost(predictions,Y))
            
    return W1, b1, W2, b2

In [84]:
X_train, Y_train, X_test, Y_test = load_data()
print(X_train.shape, Y_train.shape)


(784, 60000) (60000,)


In [85]:
layers_dims = [28*28, 10, 10]
W1, b1, W2, b2 = model(X_train, Y_train, layers_dims, 0.10, 1000)

9.771666666666667
Accuracy after iteration 0 :  9.771666666666667
Cost after iteration 0 :  14.099633333333335
9.826666666666666
Accuracy after iteration 50 :  9.826666666666666
Cost after iteration 50 :  14.08136666666667
18.16
Accuracy after iteration 100 :  18.16
Cost after iteration 100 :  10.557966666666665
46.615
Accuracy after iteration 150 :  46.615
Cost after iteration 150 :  3.2491916666666674
61.59166666666667
Accuracy after iteration 200 :  61.59166666666667
Cost after iteration 200 :  2.4196666666666666
78.63833333333334
Accuracy after iteration 250 :  78.63833333333334
Cost after iteration 250 :  1.6995916666666666
82.99833333333333
Accuracy after iteration 300 :  82.99833333333333
Cost after iteration 300 :  1.4166416666666668
85.035
Accuracy after iteration 350 :  85.035
Cost after iteration 350 :  1.2356
86.36166666666666
Accuracy after iteration 400 :  86.36166666666666
Cost after iteration 400 :  1.1221583333333334
87.32833333333333
Accuracy after iteration 450 :  87

In [86]:
def test(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    predictions = predict(A2)
    return predictions


In [87]:
Y_hat = test(X_train, W1, b1, W2, b2)
accuracy = compute_accuracy(Y_hat, Y_train)
print("Accuracy over train set: ", accuracy)

Accuracy over train set:  90.90833333333333


In [88]:
Y_hat = test(X_test, W1, b1, W2, b2)
accuracy = compute_accuracy(Y_hat, Y_test)
print("Accuracy over test set: ", accuracy)

Accuracy over test set:  91.35


In [93]:
def visual(index):
    tmp = X_train[:, index]
    image = tmp.reshape((28, 28)) * 255
    plt.imshow(image)
    plt.title("Index: "+str(index)+"\n"+str(Y_train[index]))
    plt.show()

    prediction = test(X_train[:, index, None], W1, b1, W2, b2)
    print("Result: ", prediction)


In [1]:
visual(index = random.randint(0,9999))

NameError: name 'visual' is not defined