In [1]:
import pandas as pd
import numpy as np

In [2]:
cols = (list(range(-1, 784)))
def prepare_ds(path, split=False, ratio=0.0):
    data = pd.read_csv(filepath_or_buffer=path, names=cols)
    data = data.sample(frac=1)
    if split == True:    
        d1X = np.array(data[:int(-data.shape[0] * ratio)][cols[1:]]).T
        d1Y = np.array(pd.get_dummies(data[:int(-data.shape[0] * ratio)][-1])[:]).T
        d2X = np.array(data[int(-data.shape[0] * ratio):][cols[1:]]).T
        d2Y = np.array(pd.get_dummies(data[int(-data.shape[0] * ratio):][-1])[:]).T
        return d1X, d1Y, d2X, d2Y
    if split == False:
        d1X = np.array(data[:][cols[1:]]).T
        d1Y = np.array(pd.get_dummies(data[:][-1])[:]).T
        return d1X, d1Y

In [3]:
data_path = '../digit-recognizer/mnist_train.csv'
test_path = '../digit-recognizer/mnist_test.csv'
train_X, train_Y, valid_X, valid_Y = prepare_ds(data_path, split=True, ratio=0.2)
test_X, test_Y, = prepare_ds(test_path)

In [4]:
m_train = train_X.shape[1]
m_test = valid_X.shape[1]
train_X.shape, train_Y.shape, valid_X.shape, valid_Y.shape

((784, 48000), (10, 48000), (784, 12000), (10, 12000))

In [5]:
random_M, random_V = np.random.randint(-10, 10) / 100, np.random.randint(-10, 10) / 100
def normalize(X, Y):
    X = (X - np.mean(X) * (1 - random_M)) / (np.std(X) * (1 - random_V))
    return X, Y

In [6]:
def compute_LogLoss(Yp, Yg):
    cost = -(Yg * np.log(Yp) + (1 - Yg) * np.log(1 - Yp))
    return np.mean(cost)

In [7]:
def forward(W, b, X, eval=False):
    Z = np.dot(W.T, X) + b
    A = 1. / (1 + np.exp(-Z))
    if eval == False:
        return A
    if eval == True:
        return np.argmax(A, axis=0)

In [8]:
def backward_gd(Yp, Yg, W, b, X, learning_rate=0.1):
    cost = compute_LogLoss(Yp.T, Yg.T)
    diff = Yp - Yg
    dW = np.dot(X, diff.T) / m_train
    db = np.sum(diff, axis=1, keepdims=True) / m_train
    
    W -= dW * learning_rate
    b -= db * learning_rate
    return cost, W, b

In [9]:
def accuracy(y_pred, y_actu):
    y_actt = np.argmax(y_actu, axis=0)
    return np.mean((y_pred == y_actt))

In [10]:
prep_X, prep_Y = normalize(train_X, train_Y)
prep_X.shape, prep_Y.shape  

((784, 48000), (10, 48000))

In [43]:
W = np.zeros((784, 10))
b = np.zeros((10, 1))
mini_batch = 4000
for i in range(1001):
    for end in range(mini_batch, 48001, mini_batch):
        start = end - mini_batch
        prep_X, prep_Y = normalize(train_X[:, start:end], train_Y[:, start:end])   
        pred_Y = forward(W, b, prep_X)
        cost, W, b = backward_gd(pred_Y, prep_Y, W, b, prep_X)
    if i % 50 == 0:
        tr_X, tr_Y = prep_X, prep_Y
        vl_X, vl_Y = normalize(valid_X, valid_Y)
        tr_P = forward(W, b, tr_X, eval=True)
        vl_P = forward(W, b, vl_X, eval=True)

        print(i, cost, accuracy(tr_P, tr_Y), accuracy(vl_P, vl_Y))

0 0.25733076785954145 0.77275 0.77925
50 0.08608007524499092 0.88575 0.8855
100 0.07843571631692403 0.89675 0.8953333333333333
150 0.07522226460522057 0.90225 0.8995833333333333
200 0.07334546978546341 0.90625 0.90175
250 0.07206978294475248 0.9085 0.9035833333333333
300 0.0711228513830696 0.90975 0.9046666666666666
350 0.07037836069916162 0.91 0.90575
400 0.06976894755436211 0.9105 0.9070833333333334
450 0.06925507411536021 0.9125 0.908
500 0.06881184660422167 0.913 0.9088333333333334
550 0.06842272319423628 0.913 0.9090833333333334
600 0.06807623000777067 0.91425 0.91
650 0.0677641242219723 0.9145 0.9109166666666667
700 0.06748030810718839 0.9145 0.9113333333333333
750 0.06722015717858582 0.91525 0.9118333333333334
800 0.06698008842253689 0.9155 0.912
850 0.06675727365828152 0.91525 0.9118333333333334
900 0.06654944383286493 0.915 0.9119166666666667
950 0.06635475207992989 0.91475 0.912


In [None]:
W = np.zeros((784, 10))
b = np.zeros((10, 1))
for i in range(1000):
    prep_X, prep_Y = normalize(train_X, train_Y)   
    pred_Y = forward(W, b, prep_X)
    cost, W, b = backward_gd(pred_Y, prep_Y, W, b, prep_X)
    if i % 50 == 0:
        tr_X, tr_Y = prep_X, prep_Y
        vl_X, vl_Y = normalize(valid_X, valid_Y)
        tr_P = forward(W, b, tr_X, eval=True)
        vl_P = forward(W, b, vl_X, eval=True)

        print(i, cost, accuracy(tr_P, tr_Y), accuracy(vl_P, vl_Y))

In [44]:
val_prep_X, val_prep_Y = normalize(valid_X, valid_Y)
valid_preds = forward(W, b, val_prep_X, eval=True)
accuracy(valid_preds, val_prep_Y)

0.9119166666666667

In [45]:
test_prep_X, test_prep_Y = normalize(test_X, test_Y)
test_preds = forward(W, b, test_prep_X, eval=True)
accuracy(test_preds, test_prep_Y)

0.917