In [23]:
import numpy as np
import matplotlib.pyplot as plt
import mnist

#### Загрузка MNIST и подготовка данных

In [24]:
train_data = mnist.train_images()
train_labels = mnist.train_labels()

test_data = mnist.test_images()
test_labels = mnist.test_labels()

In [25]:
print(train_data.shape, test_data.shape)

(60000, 28, 28) (10000, 28, 28)


In [32]:
X_train = np.reshape(train_data, [28*28, -1])
X_test = np.reshape(test_data, [28*28, -1])

y_train = np.reshape(train_labels, [-1, len(train_labels)])
y_test = np.reshape(test_labels, [-1, len(test_labels)])

In [34]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(784, 60000) (784, 10000)
(1, 60000) (1, 10000)


#### Support functions

In [36]:
def sigma(z):
    '''Return sigma function result
    '''
    return(1/(1 + np.power(np.exp(1), -z)))

In [37]:
# Sanity check
assert sigma(0)==0.5
assert sigma(np.inf) == 1
assert sigma(-np.inf) == 0

**Support functions**

In [41]:
def initialize_weights(m_x, m_h, m_y):
    W1 = np.random.randn(m_h, m_x)
    b1 = np.zeros((m_h, 1))
    W2 = np.random.randn(m_y, m_h)
    b2 = np.zeros((m_y, 1))
    
    initial_weights = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2}
    return initial_weights

In [45]:
def forward_pass(X, weights):
    # unpack dict with weights
    W1 = weights["W1"]
    b1 = weights["b1"]
    W2 = weights["W2"]
    b2 = weights["b2"]
    
    # forward pass
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigma(Z2)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return cache

In [51]:
def compute_cost(A2, Y, m_x):
    '''
    Compute cross-enthropy loss
    '''
    cost = -(np.dot(np.log(A2), Y.T) + np.dot((np.log(1 - A2)), (1 - Y.T)))/m_x
    cost = float(np.squeeze(cost)) # convert result in 1-dimensional array
    return cost

In [58]:
def backward_pass(cache, weights, m_x, Y):
    # unpack cache dict
    Z1 = cache['Z1']
    A1 = cache['A1']
    Z2 = cache['Z2']
    A2 = cache['A2']
    
    # unpack weights dict
    W1 = weights["W1"]
    b1 = weights["b1"]
    W2 = weights["W2"]
    b2 = weights["b2"]
    
    # backward pass
    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m_x
    db2 = np.sum(dZ2, axis = 1, keepdims = True)/m_x
    
    dZ1 = np.dot(W2.T, dZ2)*(1 - np.power(A1, 2))
    dW1 = np.dot(dZ1, X)/m_x #np.dot(dZ1, X.T)/m_x ??????????????????????????
    db1 = np.sum(dZ1, axis = 1, keepdims = True)/m_x
    
    # You can complete rewrite cache with Z1...A2 to dW1...db2, if nesessary.
    # But I have enough memory in this toy example, so I will create another one dict.
    grad_weights = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grad_weights

In [59]:
def update_weights(grad_weights, weights, lr):
    
    W1 = weights["W1"]
    b1 = weights["b1"]
    W2 = weights["W2"]
    b2 = weights["b2"]
    
    dW1 = grad_weights["dW1"]
    dW2 = grad_weights["dW2"]
    db1 = grad_weights["db1"]
    db2 = grad_weights["db2"]
    
    W1 = W1 - lr * dW1
    W2 = W2 - lr * dW2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2
    
    weights = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2}
    
    return weights

**Main functions**

In [63]:
def train(X, Y, lr, n_h, iter_num):
    '''
    X - training data
    Y - training labels
    lr - learning rate
    n_h  - size of hidden layer
    iter_num - iteration`s number
    '''
    # getting crucial matrix shapes
    n_x = X_train.shape[0] #784
    n_y = y_train.shape[0] #1, we have a simple labels and will not use softmax function
    
    m_x = X_train.shape[0] # train objects amount
    
    # initialize weights
    weights = initialize_weights(n_x, n_h, n_y)
    W1 = weights["W1"]
    b1 = weights["b1"]
    W2 = weights["W2"]
    b2 = weights["b2"]
    
    for i in range(iter_num):
        cache = forward_pass(X, weights)
        if i % 10 == 0:
            print("Cost after iteration {} : {}".format(i, compute_cost(cache["A2"], Y, m_x)))
        grad_weights = backward_pass(cache, weights, m_x, Y)
        weights = update_weights(grad_weights, weights, lr)
    


In [64]:
train(X_train, y_train, 0.001, 3, 1000)

Cost after iteration 0 : 14179.665993319226
Cost after iteration 10 : 65989.57672737286
Cost after iteration 20 : 157162.07967616734
Cost after iteration 30 : 273775.6337898905
Cost after iteration 40 : 395913.62081023946
Cost after iteration 50 : 519809.94876493816


  """


Cost after iteration 60 : nan
Cost after iteration 70 : nan
Cost after iteration 80 : nan


KeyboardInterrupt: 

In [None]:
def predict():
    pass

In [35]:
# m_x = X_train.shape[0] #784
# #n_x = X_train.shape[1] #60000

# m_h = 6 #hidden layer size, hyperparameter

# m_y = y_train.shape[0] #1, we have a simple labels and will not use softamx function