In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from get_images import get_images

In [None]:
mnist_path = './mnist_raw'

In [None]:
x_train_num, y_train_num, x_test_num, y_test_num = get_images(mnist_path)

In [None]:
x_train = x_train_num[:50000].reshape(50000, -1).astype(np.float32)/255
y_train = y_train_num[:50000].reshape(50000, 1)

x_val = x_train_num[50000:].reshape(10000, -1).astype(float)/255
y_val = y_train_num[50000:].reshape(10000, 1)

x_test = x_test_num.copy().reshape(10000, -1).astype(float)/255
y_test = y_test_num.copy().reshape(10000, 1)

In [None]:
def create_minibatches(mb_size, x, y, shuffle = True):
    
    assert x.shape[0] == y.shape[0], 'Error Muestras'
    total_data = x.shape[0]
    if shuffle: 
        idxs = np.arange(total_data)
        np.random.shuffle(idxs)
        x = x[idxs]
        y = y[idxs]
        
    return ((x[i:i+mb_size], y[i:i+mb_size]) for i in range(0, total_data, mb_size))

In [None]:
## Parametros de inicialización
def init_parameters(input_size, neurons):
    
    W1 = np.random.randn(neurons[0], input_size) * 0.001
    b1 = np.zeros((neurons[0], 1))
    
    W2 = np.random.randn(neurons[1], neurons[0]) * 0.756015625
    b2 = np.zeros((neurons[1], 1))
    
    W3 = np.random.randn(neurons[2], neurons[1]) * 0.758359375
    b3 = np.zeros((neurons[2], 1))
    
    W4 = np.random.randn(neurons[3], neurons[2]) * 0.618125
    b4 = np.zeros((neurons[3], 1))
    
    W5 = np.random.randn(neurons[4], neurons[3]) * 0.330996094
    b5 = np.zeros((neurons[4], 1))
    
    W6 = np.random.randn(neurons[5], neurons[4]) * 0.121914063
    b6 = np.zeros((neurons[5], 1))
    
    W7 = np.random.randn(neurons[6], neurons[5]) * 0.19
    b7 = np.zeros((neurons[6], 1))
    
    W8 = np.random.randn(neurons[7], neurons[6]) * 0.001
    b8 = np.zeros((neurons[7], 1))
    
    return {'W1':W1, 'b1':b1, 'W2':W2, 'b2':b2, 'W3':W3, 'b3':b3, 'W4':W4, 'b4':b4, 'W5':W5, 'b5':b5, 'W6':W6, 'b6':b6, 'W7':W7, 'b7':b7, 'W8':W8, 'b8':b8}

In [None]:
### Revisar forma de las capas de neuronas y cambiar tamaños
parameters = init_parameters(28*28, [100,100,100,100,100,100,100,10])

In [None]:
def relu(x):
    return np.maximum(0, x)

In [None]:
def scores1(x, parameters, activation_fcn):
    
    z1 = parameters['W1'] @ x + parameters['b1']
    a1 = activation_fcn(z1)
    
    z2 = parameters['W2'] @ a1 + parameters['b2']
    a2 = activation_fcn(z2)
    
    z3 = parameters['W3'] @ a2 + parameters['b3']
    a3 = activation_fcn(z3)
    
    z4 = parameters['W4'] @ a3 + parameters['b4']
    a4 = activation_fcn(z4)
    
    z5 = parameters['W5'] @ a4 + parameters['b5']
    a5 = activation_fcn(z5)
    
    z6 = parameters['W6'] @ a5 + parameters['b6']
    a6 = activation_fcn(z6)
    
    z7 = parameters['W7'] @ a6 + parameters['b7']
    a7 = activation_fcn(z6)
    
    z8 = parameters['W8'] @ a7 + parameters['b8']
    
    return z8, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7

In [None]:
scores, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7 = scores1(x_train[:64].T, parameters, relu)

In [None]:
def softmax(x):
    exp_scores = np.exp(x)
    sum_exp_scores = np.sum(exp_scores, axis=0)
    probs = exp_scores/sum_exp_scores
    return probs

In [None]:
def x_entropy(scores, y, batch_size=64):
    
    probs = softmax(scores)
    y_hat = probs[y.squeeze(), np.arange(batch_size)]
    cost = np.sum(-np.log(y_hat)) / batch_size
    
    return probs, cost

In [None]:
def backward(probs, x, y, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7 , scores, parameters, batch_size=64):
    
    grads = {}
    probs[y.squeeze(), np.arange(batch_size)] -= 1
        
    dz8 = probs.copy()
    dW8 = dz8 @ a7.T / batch_size
    db8 = np.sum(dz8, axis=1, keepdims=True)  / batch_size
    
    da7 = parameters['W8'].T @ dz8
    dz7 = da7.copy()
    dz7 [z7 <= 0 ] = 0
    dW7 = dz7 @ a6.T / batch_size
    db7 = np.sum(dz7, axis=1, keepdims=True)  / batch_size
    
    da6 = parameters['W7'].T @ dz7
    dz6 = da6.copy()
    dz6 [z6 <= 0 ] = 0
    dW6 = dz6 @ a5.T / batch_size
    db6 = np.sum(dz6, axis=1, keepdims=True)  / batch_size
    
    da5 = parameters['W6'].T @ dz6
    dz5 = da5.copy()
    dz5 [z5 <= 0 ] = 0
    dW5 = dz5 @ a4.T / batch_size
    db5 = np.sum(dz5, axis=1, keepdims=True) / batch_size
    
    da4 = parameters['W5'].T @ dz5
    dz4 = da4.copy()
    dz4 [z4 <= 0 ] = 0
    dW4 = dz4 @ a3.T / batch_size
    db4 = np.sum(dz4, axis=1, keepdims=True)/ batch_size
    
    da3 = parameters['W4'].T @ dz4 
    dz3 = da3.copy()
    dz3 [z3 <= 0 ] = 0
    dW3 = dz3 @ a2.T / batch_size
    db3 = np.sum(dz3, axis=1, keepdims=True) / batch_size
    
    da2 = parameters['W3'].T @ dz3
    dz2 = da2.copy()
    dz2 [z2 <= 0 ] = 0
    dW2 = dz2 @ a1.T / batch_size
    db2 = np.sum(dz2, axis=1, keepdims=True) / batch_size
    
    da1 = parameters['W2'].T @ dz2
    dz1 = da1.copy()
    dz1 [z1 <= 0 ] = 0
    dW1 = dz1 @ x
    db1 = np.sum(dz1, axis=1, keepdims=True)
    
    
    assert parameters['W1'].shape == dW1.shape, 'Forma diferente W1'
    assert parameters['W2'].shape == dW2.shape, 'Forma diferente W2'
    assert parameters['W3'].shape == dW3.shape, 'Forma diferente W3'
    assert parameters['W4'].shape == dW4.shape, 'Forma diferente W4'
    assert parameters['W5'].shape == dW5.shape, 'Forma diferente W5'
    assert parameters['W6'].shape == dW6.shape, 'Forma diferente W6'
    assert parameters['W7'].shape == dW7.shape, 'Forma diferente W7'
    assert parameters['W8'].shape == dW8.shape, 'Forma diferente W8'
    
    assert parameters['b1'].shape == db1.shape, 'Forma diferente b1'
    assert parameters['b2'].shape == db2.shape, 'Forma diferente b2'
    assert parameters['b3'].shape == db3.shape, 'Forma diferente b3'
    assert parameters['b4'].shape == db4.shape, 'Forma diferente b4'
    assert parameters['b5'].shape == db5.shape, 'Forma diferente b5'
    assert parameters['b6'].shape == db6.shape, 'Forma diferente b6'
    assert parameters['b7'].shape == db7.shape, 'Forma diferente b7'
    assert parameters['b8'].shape == db8.shape, 'Forma diferente b8'
    
    
    grads = {'w1':dW1,  'b1':db1, 'w2':dW2, 'b2':db2, 'w3':dW3,  'b3':db3, 'w4':dW4, 'b4':db4, 'w5':dW5,  'b5':db5, 'w6':dW6, 'b6':db6, 'w7':dW7, 'b7':db7, 'w8':dW8, 'b8':db8}
    
    return grads

In [None]:
##Pruebas del calculo de gradientes
y_hat, cost = x_entropy(scores, y_train[:64])

In [None]:
## Revisar formas
#print(y_hat.shape)
#print(x_train.T.shape)
#print(scores.shape)
#print(cost)

In [None]:
grads = backward(y_hat, x_train[:64], y_train[:64], z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7, scores, parameters)

In [None]:
def accuracy(x_data, y_data, mb_size=64):
    correct = 0
    total = 0
    for i, (x, y) in enumerate(create_minibatches(mb_size, x_data, y_data)):
        scores2, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7 = scores1(x.T, parameters, relu)
        y_hat, cost = x_entropy(scores2, y, batch_size=len(x))
        
        correct += np.sum(np.argmax(y_hat, axis=0) == y.squeeze())
        total += y_hat.shape[1]
    return correct/total

In [None]:
def train(epochs, parameters, mb_size=64, learning_rate = 1e-4):
    for epoch in range(epochs):
        for i, (x, y) in enumerate(create_minibatches(mb_size, x_train, y_train)):
            scores2, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7 = scores1(x.T, parameters=parameters, activation_fcn=relu)
            y_hat, cost = x_entropy(scores2, y, batch_size=len(x))
            grads = backward(y_hat, x, y, z7, z6, z5, z4, z3, z2, z1, a1, a2, a3, a4, a5, a6, a7, scores2, parameters, batch_size=len(x))
            
            parameters['W1'] = parameters['W1'] - learning_rate*grads['w1']
            parameters['b1'] = parameters['b1'] - learning_rate*grads['b1']
            parameters['b2'] = parameters['b2'] - learning_rate*grads['b2']
            parameters['W2'] = parameters['W2'] - learning_rate*grads['w2']
            parameters['W3'] = parameters['W3'] - learning_rate*grads['w3']
            parameters['b3'] = parameters['b3'] - learning_rate*grads['b3']
            parameters['b4'] = parameters['b4'] - learning_rate*grads['b4']
            parameters['W4'] = parameters['W4'] - learning_rate*grads['w4']
            parameters['W5'] = parameters['W5'] - learning_rate*grads['w5']
            parameters['b5'] = parameters['b5'] - learning_rate*grads['b5']
            parameters['b6'] = parameters['b6'] - learning_rate*grads['b6']
            parameters['W6'] = parameters['W6'] - learning_rate*grads['w6']
            parameters['W7'] = parameters['W7'] - learning_rate*grads['w7']
            parameters['b7'] = parameters['b7'] - learning_rate*grads['b7']
            parameters['b8'] = parameters['b8'] - learning_rate*grads['b8']
            parameters['W8'] = parameters['W8'] - learning_rate*grads['w8']
            
        print(f'costo: {cost} precisión: {accuracy(x_val, y_val, mb_size)} precisión_entrenamiento: {accuracy(x_train, y_train, mb_size)} precisión_prueba: {accuracy(x_test, y_test, mb_size)}')
    return parameters

In [None]:
##Parametros para el entrenamiento de la red neuronal
mb_size = 1024
learning_rate = 1e-4
epochs = 400
parameters = train(epochs=epochs,parameters=parameters,mb_size=mb_size,learning_rate=learning_rate)

In [None]:
#print(accuracy(x_train, y_train, mb_size))
#print(accuracy(x_test, y_test, mb_size))

In [None]:
def predict(x):
    scores2,_,_,_,_,_,_,_,_,_,_,_,_,_,_= scores1(x, parameters, relu)
    return np.argmax(scores2)

In [None]:
x_test[0].reshape(-1, 1).shape

In [None]:
def plot_number(image):
    plt.imshow(image.squeeze(), cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.show()

In [None]:
idx = np.random.randint(len(y_test))
plot_number(x_test_num[idx])
pred = predict(x_test[idx].reshape(-1, 1))
print(f'Valor predicho: {pred}. Valor real:{y_test[idx][0]}.')