In [None]:
# necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import math

# Preparation of training and validation datasets

The data was taken from <a href="https://www.kaggle.com/c/digit-recognizer/data">kaggle competition</a>

In [None]:
# max 42000
train_size = 32000
validation_size = 10000

In [None]:
x_train = np.genfromtxt('mnist/train.csv',delimiter=',', skip_header=1, skip_footer=42000 - train_size)[:, 1:]
x_train.shape

In [None]:
y_train = np.genfromtxt('mnist/train.csv',delimiter=',', skip_header=1, skip_footer=42000 - train_size)[:, 0][:, np.newaxis]
y_train.shape

In [None]:
x_validation = np.genfromtxt('mnist/train.csv',
                        delimiter=',', 
                        skip_header=1 + train_size, 
                        skip_footer=42000 - validation_size - train_size)[:, 1:]
x_validation.shape

In [None]:
y_validation = np.genfromtxt('mnist/train.csv',
                        delimiter=',', 
                        skip_header=1 + train_size, 
                        skip_footer=42000 - validation_size - train_size)[:, 0][:,np.newaxis]
y_validation.shape

In [None]:
# Image size 28x28
plt.imshow(np.array([x_train[9][i-28:i] for i in range(28,785, 28)]));
y_train[9]

# Activation function

In [None]:
# Hyperbolic tangent
def tanh(x):
    return (math.e**x - math.e**-x) / (math.e**x + math.e**-x)

# Derivative of hyperbolic tangent
def d_tanh(x):
    return 1 - tanh(x)**2

# Softmax in output layer
def softmax(x):
    assert x.shape[1] == 10
    exps = np.exp(x - np.max(x))
    return exps / np.sum(exps, axis=1)[:, np.newaxis]
    

# Network architecture

In [None]:
class DigitRecognizer:
    def __init__(self, n_hidden_neurons):
        self.layer1 = np.random.random((n_hidden_neurons,784)) - 0.5
        self.b1 = np.random.random((n_hidden_neurons, 1)) - 0.5
        self.act1 = tanh
        self.d_act1 = d_tanh
        self.layer2 = np.random.random((10,n_hidden_neurons)) - 0.5
        self.b2 = np.random.random((10, 1)) - 0.5
    
    def forward(self, x):
        x = x / 255
        # (n_hidden_neurons, n_examples) = (n_hidden_neurons, 784) @ (n_examples, 784).T
        x = self.layer1 @ x.T
        # (n_hidden_neurons, n_examples) += (n_hidden_neurons, 1)
        x += self.b1
        self.summatory1 = x
        # (n_hidden_neurons, n_examples)
        x = self.act1(x)
        # (10, n_examples) = (10, n_hidden_neurons) @ (n_hidden_neurons, n_examples)
        x = self.layer2 @ x
        # (10, n_examples) += (10, 1)
        x += self.b2
        self.summatory2 = x
        # (10, n_examples).T = # (n_examples, 10)
        return softmax(x.T)

net = DigitRecognizer(100) # You can change number of neurnons

In [None]:
def accuracy(x, y):
    return np.sum(net.forward(x).argmax(axis=1)[:,np.newaxis] == y) / len(y)
accuracy(x_validation, y_validation)

# Cross entropy loss

In [None]:
def cross_entropy(y_pred, y):
    m = y.shape[0]
    log_likelihood = -np.log(y_pred[range(m),y.reshape(m).astype(int)])
    loss = np.sum(log_likelihood) / m
    return loss

# derivative of cross entropy with softmax
def d_ce(y_pred, y):
    ans = np.zeros(y_pred.shape)
    ans[range(len(y)), y.reshape(len(y)).astype(int)] = -1 + y_pred[range(len(y)), y.reshape(len(y)).astype(int)]
    return ans

In [None]:
cross_entropy(net.forward(x_validation), y_validation)

# Learning process (back propagation algorithm)

In [None]:
def train_without_softmax(net, x, y, lr):
    y_pred = net.forward(x) # (n_examples, 10)
    
    error_2 = d_ce(y_pred, y) # (n_examples, 10)
    net.b2 -= lr * np.sum(error_2 ,axis=0)[:,np.newaxis] # (10, 1)
    # (10,n_hidden_neurons) -= lr *  (n_examples, 10).T @ (n_hidden_neurons, n_examples).T
    net.layer2 -= lr * error_2.T @ net.act1(net.summatory1).T
    
    # (n_hidden_neurons, n_examples) = (10, n_hidden_neurons).T @ (n_examples, 10).T * (n_hidden_neurons, n_examples)
    error_1 = net.layer2.T @ error_2.T * net.d_act1(net.summatory1)
    net.b1 -= lr * np.sum(error_1 ,axis=1)[:,np.newaxis] # (n_hidden_neurons, 1)
    # (n_hidden_neurons, 784) -= lr * (n_hidden_neurons, n_examples) @ (n_examples, 784)
    net.layer1 -= lr * error_1 @ x

In [None]:
import sys
n_epoch = 10
batch_size = 2000
for i in range(n_epoch):
    for j in range(batch_size, train_size + 1, batch_size):
        train_without_softmax(net, x_train[j - batch_size: j], y_train[j - batch_size: j], 0.00001)
        sys.stdout.write("\r" + 'Now: ' + str(j) + ' from ' + str(train_size))
        sys.stdout.flush()
    print("\r",accuracy(x_validation, y_validation), 
            cross_entropy(net.forward(x_validation), y_validation))

In [None]:
summ = 0
step = 8000
d = 0
for i in range(8000, 32001, step):
    summ += accuracy(x_train[i-step:i], y_train[i-step:i])
    d += 1
print(summ / d)

In [None]:
n = 71
plt.imshow(np.array([x_train[n][i-28:i] for i in range(28,785, 28)]));
print(int(y_train[n][0]), " - answer")
print(net.forward(x_train[n][np.newaxis,]).argmax(), " - network output")

In [None]:
import matplotlib.image as mpimg
image = mpimg.imread('mnist/mypicture.png')
vector_img = np.array(1 - image[:,:, 0]) * 255
plt.imshow(image);
net.forward(vector_img.reshape(784)[np.newaxis,]).argmax()