In [1]:
import numpy as np
np.random.seed(7)

def sigmoid(x):
    return 1./(1.+np.exp(-x))

def sigmoid_back(x):
    return sigmoid(x)*(1.-sigmoid(x))

def softmax(x):
	e = np.exp(x-np.max(x))
	return e/e.sum(axis=1, keepdims=True)
       
def onehot(x):
    #set 1 on column with highest value
    out = []
    for i in x:
        idx = np.argmax(i)
        p = [0,0,0,0]
        p[idx] = 1.
        out.append(p)
    return np.array(out)

def accuracy(y_true, y_pred):
    if not (len(y_true) == len(y_pred)):
        print('Size of predicted and true labels not equal.')
        return 0.0

    corr = 0
    for i in range(0,len(y_true)):
        corr += 1 if (y_true[i] == y_pred[i]).all() else 0

    return corr/len(y_true)

In [2]:
#import data
Xin = np.genfromtxt ("./train_data.csv", delimiter=",")
Yin = np.genfromtxt ("./train_labels.csv", delimiter=",")
#take first 22000 samples for training
X = Xin[:22000]
Y = Yin[:22000]
#get last 2754 samples for validation
x2 = Xin[-2754:]
y2 = Yin[-2754:]

In [3]:
#learning rate
lr = 10e-5

#having 32 perceptrons on first layer
#create an array of 784*32 weights from a normal random dist 
#output 4 different classes
w1 = np.random.randn(784,32)
b1 = np.random.randn(32)
w2 = np.random.randn(32,4)
b2 = np.random.randn(4)

#train 2000 times
for epoch in range(2000):
#Forward

    #mid layer
    z1 = X @ w1+b1
    a1 = sigmoid(z1)

    #out layer
    z2 = a1 @ w2+b2
    a2 = softmax(z2)

#BackProp

    #out layer
    dz2 = a2-Y
    dw2 = a1.T @ dz2
    db2 = dz2.sum(axis=0)

    #mid layer
    dz1 = sigmoid_back(z1)
    da1 = dz2 @ w2.T
    dw1 = X.T @ (dz1*da1)
    db1 = (da1*dz1).sum(axis=0)

    #update weights
    w2 = w2-lr*dw2
    b2 = b2-lr*db2
    w1 = w1-lr*dw1
    b1 = b1-lr*db1

#Print accuracy every 100 epochs
    if epoch % 100 == 0:
        print(epoch)
        out = onehot(a2)
        incorrect = np.sum(out != Y) / 2.0
        acc = 1.-incorrect/(Y.shape[0]*4)
        print('acc:', acc)

0
acc: 0.8407613636363637
100
acc: 0.9867386363636363
200
acc: 0.9897727272727272
300
acc: 0.9914090909090909
400
acc: 0.9926136363636363
500
acc: 0.9933181818181818
600
acc: 0.9939204545454545
700
acc: 0.9942840909090909
800
acc: 0.9947159090909091
900
acc: 0.9950340909090909
1000
acc: 0.9953181818181818
1100
acc: 0.9955568181818182
1200
acc: 0.9957840909090909
1300
acc: 0.9960568181818181
1400
acc: 0.9962386363636364
1500
acc: 0.9964545454545455
1600
acc: 0.9966136363636363
1700
acc: 0.996840909090909
1800
acc: 0.9970795454545455
1900
acc: 0.9972272727272727


In [4]:
#get final accuracy on training set
out = onehot(a2)
calc = accuracy(Y, out)
calc

0.9893181818181818

In [5]:
def model(X):
    z1 = X @ w1 + b1
    a1 = sigmoid(z1)

    z2 = a1 @ w2 + b2
    a2 = softmax(z2)
    return a2

In [6]:
#run model with trained weights on validation set
out2 = onehot(model(x2))
calc = accuracy(y2, out2)
calc

0.9840232389251997

In [7]:
#save weights
np.savez('weights.npz', w1=w1, b1=b1, w2=w2, b2=b2)

In [8]:
#check weights
data = np.load('weights.npz')
cw1 = data['w1']
cw1.shape

(784, 32)