In [123]:
from keras.datasets import mnist
import numpy as np
from tqdm import tqdm

In [124]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

permutation = np.random.permutation(len(x_train))
x_train = x_train[permutation]
y_train = y_train[permutation]

def vectorize(list, vector_len):
    result = np.zeros((len(list), vector_len))
    for i in range(len(list)):
        result[i, list[i]] = 1
    return result

def normalize(matrix):
    min_value = np.min(matrix, axis=1).reshape(-1, 1)
    max_value = np.max(matrix, axis=1).reshape(-1, 1)
    
    return (matrix - min_value) / (max_value - min_value)

y_train = vectorize(y_train, 10)
y_test = vectorize(y_test, 10)

x_train = normalize(x_train.reshape(x_train.shape[0], -1))
x_test = normalize(x_test.reshape(x_test.shape[0], -1))


In [125]:
print("Shapes:")
print("x_train", x_train.shape)
print("y_train", y_train.shape)
print("x_test", x_test.shape)
print("y_test", y_test.shape)

Shapes:
x_train (60000, 784)
y_train (60000, 10)
x_test (10000, 784)
y_test (10000, 10)


In [126]:
def train(x_train, y_train, x_test, y_test, layers, epochs=10, learning_rate=0.01, batch_size=200):
    
    weights = []
    biases = []
    for l1, l2 in zip(layers[:-1], layers[1:]):
       weights.append(np.random.randn(l1, l2) / 10)
       biases.append(np.zeros(l2))
    
    for _ in range(epochs):
        number_of_batches = x_train.shape[0] // batch_size
        x_batches = [x_train[i * batch_size: (i + 1) * batch_size] for i in range(number_of_batches)]
        y_batches = [y_train[i * batch_size: (i + 1) * batch_size] for i in range(number_of_batches)]
        for x_batch, y_batch in tqdm(zip(x_batches, y_batches), total=len(x_batches)):
            
            biases_gradients_sum = [np.zeros(b.shape) for b in biases]
            weights_gradients_sum = [np.zeros(w.shape) for w in weights]
            for x, y in zip(x_batch, y_batch):
                predictions, layer_input, before_act = forward_propagation(x, weights, biases)
                b_gradients, w_gradients = backward_propagation(predictions, y, weights, biases, layer_input, before_act)
                biases_gradients_sum = [sum + new_gradient for sum, new_gradient in zip(biases_gradients_sum, b_gradients)]
                weights_gradients_sum = [sum + new_gradient for sum, new_gradient in zip(weights_gradients_sum, w_gradients)]
    
            biases = [b - learning_rate * nb / batch_size for b, nb in zip(biases, biases_gradients_sum)]
            weights = [w - learning_rate * nw / batch_size for w, nw in zip(weights, weights_gradients_sum)]
            
        print("test", test(x_test, y_test, weights, biases))
            
    return biases, weights

def test(x_matrix, y_matrix, weights, biases):
    correct_answers = 0
    for x, y in zip(x_matrix, y_matrix):
        predictions, _, _ = forward_propagation(x, weights, biases)
        if y[np.argmax(predictions)] == 1:
            correct_answers += 1
        
    return str(correct_answers / y_matrix.shape[0] * 100) + "%"
    
def forward_propagation(x, weights, biases):
    layer_input = []
    before_act = []
    for w, b in zip(weights, biases):
        layer_input.append(x)
        x = x @ w + b
        before_act.append(x)
        x = sigmoid(x)
        
    return x, layer_input, before_act
   
def backward_propagation(predictions, y, weights, biases, layer_input, before_act):
    
    b_gradients = []
    w_gradients = []
    
    diff = (2 / y.shape[0]) * (predictions - y)
    for linp, bef_act, w, b in reversed(list(zip(layer_input, before_act, weights, biases))):
        diff = sigmoid_derivative(bef_act) * diff
        b_gradients.append(diff.flatten())
        w_gradients.append(linp.reshape(-1, 1) @ diff.reshape(1, -1))
        diff = diff @ w.T

    return list(reversed(b_gradients)), list(reversed(w_gradients))

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

biases, weights = train(x_train, y_train, x_test, y_test, layers=[784, 32, 10], epochs=10, learning_rate=3, batch_size=100)

100%|██████████| 600/600 [00:05<00:00, 112.03it/s]


test 82.48%


100%|██████████| 600/600 [00:05<00:00, 105.63it/s]


test 89.16%


100%|██████████| 600/600 [00:05<00:00, 116.96it/s]


test 90.31%


100%|██████████| 600/600 [00:05<00:00, 114.56it/s]


test 90.95%


100%|██████████| 600/600 [00:05<00:00, 111.37it/s]


test 91.55%


100%|██████████| 600/600 [00:05<00:00, 105.62it/s]


test 92.06%


100%|██████████| 600/600 [00:05<00:00, 108.56it/s]


test 92.36999999999999%


100%|██████████| 600/600 [00:05<00:00, 112.51it/s]


test 92.58%


100%|██████████| 600/600 [00:05<00:00, 107.51it/s]


test 92.82000000000001%


100%|██████████| 600/600 [00:05<00:00, 116.32it/s]

test 92.96%



