In [2]:
import pandas as pd
import numpy as np

In [3]:
cols = (list(range(-1, 784)))
def prepare_ds(path, split=False, ratio=0.0):
    data = pd.read_csv(filepath_or_buffer=path, names=cols)
    data = data.sample(frac=1)
    if split == True:    
        d1X = np.array(data[:int(-data.shape[0] * ratio)][cols[1:]]).T
        d1Y = np.array(pd.get_dummies(data[:int(-data.shape[0] * ratio)][-1])[:]).T
        d2X = np.array(data[int(-data.shape[0] * ratio):][cols[1:]]).T
        d2Y = np.array(pd.get_dummies(data[int(-data.shape[0] * ratio):][-1])[:]).T
        return d1X, d1Y, d2X, d2Y
    if split == False:
        d1X = np.array(data[:][cols[1:]]).T
        d1Y = np.array(pd.get_dummies(data[:][-1])[:]).T
        return d1X, d1Y

In [4]:
data_path = '../digit-recognizer/mnist_train.csv'
test_path = '../digit-recognizer/mnist_test.csv'
train_X, train_Y, valid_X, valid_Y = prepare_ds(data_path, split=True, ratio=0.2)
test_X, test_Y, = prepare_ds(test_path)

In [5]:
m_train = train_X.shape[1]
m_test = valid_X.shape[1]
train_X.shape, train_Y.shape, valid_X.shape, valid_Y.shape

((784, 48000), (10, 48000), (784, 12000), (10, 12000))

In [18]:
def normalize(X, Y):
    X = (X - np.mean(X)) / np.std(X)
    return X, Y

In [7]:
def compute_MSE(Yp, Yg):
    cost = (Yg - Yp) ** 2
    return np.mean(cost)

In [8]:
def forward(W, b, X, eval=False):
    Z = np.dot(W.T, X) + b
    A = 1. / (1 + np.exp(-Z))
    if eval == False:
        return A
    if eval == True:
        return np.argmax(A, axis=0)

In [9]:
def backward(Yp, Yg, W, b, X, learning_rate=0.1):
    cost = compute_MSE(Yp.T, Yg.T)
    diff = 2 * (Yp - Yg)
    dW = np.dot(X, diff.T) / m_train
    db = np.sum(diff, axis=1, keepdims=True) / m_train
    
    W -= dW * learning_rate
    b -= db * learning_rate
    return cost, W, b

In [10]:
def accuracy(y_pred, y_actu):
    y_actt = np.argmax(y_actu, axis=0)
    return np.mean((y_pred == y_actt))

In [20]:
W = np.zeros((784, 10))
b = np.zeros((10, 1))
prep_X, prep_Y = normalize(train_X, train_Y)
for i in range(400):
    pred_Y = forward(W, b, prep_X)
    cost, W, b = backward(pred_Y, prep_Y, W, b, prep_X)
    if i % 10 == 0:
        print(i, cost)

0 0.25
10 0.03844744266441059
20 0.029186846679274247
30 0.024469903179005947
40 0.022073812688573394
50 0.021073496004437628
60 0.020497724048394814
70 0.020059846413298207
80 0.01971129993269403
90 0.019424722677136044
100 0.019179490715640155
110 0.018966062250414282
120 0.018780529011136523
130 0.018619771055707623
140 0.01847999802938504
150 0.018357169990396167
160 0.018247677231455792
170 0.01814867826198515
180 0.018058107328635355
190 0.017974513768880474
200 0.017896866752140063
210 0.017824397076642174
220 0.017756496239345415
230 0.017692662310813883
240 0.017632472670484952
250 0.01757556808448476
260 0.017521640699065727
270 0.01747042400206901
280 0.017421684768401276
290 0.017375217042450184
300 0.01733083781411043
310 0.01728838385316159
320 0.017247709227887906
330 0.017208683205490256
340 0.01717118839235982
350 0.017135119071682747
360 0.017100379736052704
370 0.01706688381629861
380 0.017034552596948256
390 0.01700331429771455


In [21]:
val_prep_X, val_prep_Y = normalize(valid_X, valid_Y)
valid_preds = forward(W, b, val_prep_X, eval=True)
accuracy(valid_preds, val_prep_Y)

0.9083333333333333

In [22]:
test_prep_X, test_prep_Y = normalize(test_X, test_Y)
test_preds = forward(W, b, test_prep_X, eval=True)
accuracy(test_preds, test_prep_Y)

0.9173