In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
from dataset import load_svhn, random_split_train_val
from gradient_check import check_layer_gradient, check_layer_param_gradient, check_model_gradient
from layers import FullyConnectedLayer, ReLULayer
from model import TwoLayerNet
from trainer import Trainer, Dataset
from optim import SGD, MomentumSGD
from metrics import multiclass_accuracy

In [3]:
def prepare_for_neural_network(train_X, test_X):
    train_flat = train_X.reshape(train_X.shape[0], -1).astype(float) / 255.0
    test_flat = test_X.reshape(test_X.shape[0], -1).astype(float) / 255.0
    
    # Subtract mean
    mean_image = np.mean(train_flat, axis = 0)
    train_flat -= mean_image
    test_flat -= mean_image
    
    return train_flat, test_flat
    
train_X, train_y, test_X, test_y = load_svhn("data", max_train=10000, max_test=1000)    
train_X, test_X = prepare_for_neural_network(train_X, test_X)
# Split train into train and val
train_X, train_y, val_X, val_y = random_split_train_val(train_X, train_y, num_val = 1000)

Как всегда, начинаем с кирпичиков

Мы будем реализовывать необходимые нам слои по очереди. Каждый слой должен реализовать:

прямой проход (forward pass), который генерирует выход слоя по входу и запоминает необходимые данные
обратный проход (backward pass), который получает градиент по выходу слоя и вычисляет градиент по входу и по параметрам
Начнем с ReLU, у которого параметров нет.

In [4]:
# TODO: Implement ReLULayer layer in layers.py
# Note: you'll need to copy implementation of the gradient_check function from the previous assignment

X = np.array([[1,-2,3],
              [-1, 2, 0.1]])

assert check_layer_gradient(ReLULayer(), X)

CHECK GRADIENT
x is 
 [[ 1.  -2.   3. ]
 [-1.   2.   0.1]]
analytic grad is 
 [[ 1.61599406  0.          2.76252524]
 [-0.         -0.5060347  -0.94302519]]
numeric grad array is 
 [[ 1.61599406  0.          2.76252524]
 [ 0.         -0.5060347  -0.94302519]]
Gradient check passed!


In [5]:
# TODO: Implement FullyConnected layer forward and backward methods
assert check_layer_gradient(FullyConnectedLayer(3, 4), X)
# TODO: Implement storing gradients for W and B
assert check_layer_param_gradient(FullyConnectedLayer(3, 4), X, 'W')
assert check_layer_param_gradient(FullyConnectedLayer(3, 4), X, 'B')

CHECK GRADIENT
x is 
 [[ 1.  -2.   3. ]
 [-1.   2.   0.1]]
analytic grad is 
 [[-2.48170965e-04  2.14648270e-04 -6.53086067e-05]
 [ 5.30725442e-05  2.79276613e-04 -8.01671530e-05]]
numeric grad array is 
 [[-2.48170965e-04  2.14648270e-04 -6.53086067e-05]
 [ 5.30725442e-05  2.79276613e-04 -8.01671530e-05]]
Gradient check passed!
CHECK GRADIENT
x is 
 [[-1.97365551e-03  6.80635002e-04  5.55869402e-05  5.41565103e-04]
 [ 1.83213048e-04 -5.59157557e-04  8.94307167e-04  1.98126611e-04]
 [-1.28938649e-03  1.30851716e-03 -1.43047956e-03  9.00929234e-04]]
analytic grad is 
 [[-1.5033225   1.83023836  0.93577972 -0.30656704]
 [ 3.006645   -3.66047671 -1.87155943  0.61313409]
 [ 0.15033225  5.49071507 -0.09357797 -0.91970113]]
numeric grad array is 
 [[-1.5033225   1.83023836  0.93577972 -0.30656704]
 [ 3.006645   -3.66047671 -1.87155943  0.61313409]
 [ 0.15033225  5.49071507 -0.09357797 -0.91970113]]
Gradient check passed!
CHECK GRADIENT
x is 
 [[-1.33418835e-03  2.29823155e-04  1.28956322e-03

In [11]:
# TODO: In model.py, implement compute_loss_and_gradients function
model = TwoLayerNet(n_input = train_X.shape[1], n_output = 10, hidden_layer_size = 3, reg = 0)
loss = model.compute_loss_and_gradients(train_X[:2], train_y[:2])

# TODO Now implement backward pass and aggregate all of the params
check_model_gradient(model, train_X[:2], train_y[:2])

Checking gradient for W1
CHECK GRADIENT
x is 
 [[ 1.12196096e-03  2.60386566e-03 -7.70736619e-05]
 [-4.77813258e-04  2.19520154e-04  8.68511430e-04]
 [ 1.29594320e-03 -6.02318160e-04  1.00980449e-03]
 ...
 [ 6.03859817e-04  6.38097311e-04 -2.10330014e-03]
 [ 7.08521990e-04  4.65736852e-04  1.20636367e-03]
 [-1.15133796e-03  3.27460859e-04 -3.72324315e-04]]
analytic grad is 
 [[ 1.19667622e-05 -4.10892249e-07  3.72685554e-06]
 [ 1.42586847e-05 -4.89587985e-07  1.48172437e-06]
 [ 4.05510533e-05 -1.39236605e-06 -3.73493220e-06]
 ...
 [-1.11350711e-04  3.82335198e-06  5.25464616e-06]
 [-9.94988794e-05  3.41640601e-06  1.66032268e-06]
 [-7.92541950e-05  2.72128198e-06 -4.03205868e-06]]
numeric grad array is 
 [[ 1.19667387e-05 -4.10915746e-07  3.72684106e-06]
 [ 1.42586831e-05 -4.89652763e-07  1.48170365e-06]
 [ 4.05510736e-05 -1.39239731e-06 -3.73492348e-06]
 ...
 [-1.11350706e-04  3.82334164e-06  5.25464117e-06]
 [-9.94988536e-05  3.41637829e-06  1.66036074e-06]
 [-7.92542032e-05  2.72128

True

In [20]:
# TODO Now implement l2 regularization in the forward and backward pass
model_with_reg = TwoLayerNet(n_input = train_X.shape[1], n_output = 10, hidden_layer_size = 3, reg = 1e1)
loss_with_reg = model_with_reg.compute_loss_and_gradients(train_X[:2], train_y[:2])
assert loss_with_reg > loss and not np.isclose(loss_with_reg, loss), \
    "Loss with regularization (%2.4f) should be higher than without it (%2.4f)!" % (loss, loss_with_reg)

check_model_gradient(model_with_reg, train_X[:2], train_y[:2])

Checking gradient for W1
CHECK GRADIENT
x is 
 [[ 5.88921615e-04 -7.83157141e-04 -1.76049658e-03]
 [ 2.89742218e-04  3.36321714e-04  1.21929277e-03]
 [-9.60196226e-04  6.31096920e-04 -1.06689282e-03]
 ...
 [-1.10271763e-03 -1.30273392e-03 -3.16518360e-05]
 [-1.37680448e-04 -3.26463798e-04  1.00704554e-03]
 [ 1.23954248e-03 -4.42987199e-04  1.32286296e-03]]
analytic grad is 
 [[ 0.01177843 -0.01566314 -0.0352268 ]
 [ 0.00579484  0.00672643  0.02437915]
 [-0.01920392  0.01262194 -0.02132095]
 ...
 [-0.02205435 -0.02605468 -0.00065682]
 [-0.00275361 -0.00652928  0.02013339]
 [ 0.02479085 -0.00885974  0.02647551]]
numeric grad array is 
 [[ 0.01177843 -0.01566314 -0.0352268 ]
 [ 0.00579484  0.00672643  0.02437915]
 [-0.01920392  0.01262194 -0.02132095]
 ...
 [-0.02205435 -0.02605468 -0.00065682]
 [-0.00275361 -0.00652928  0.02013339]
 [ 0.02479085 -0.00885974  0.02647551]]
Gradient check passed!
Checking gradient for B1
CHECK GRADIENT
x is 
 [[ 0.001761   -0.00102397  0.0009029 ]]
analytic

True

In [28]:
# Finally, implement predict function!

# TODO: Implement predict function
# What would be the value we expect?
multiclass_accuracy(model_with_reg.predict(train_X[:30]), train_y[:30]) 

prediction shape is 
 (30,)


0.06666666666666667

In [7]:
X = np.array([[1,-2,3],
              [-1, 2, 0.1]
              ])

Y = np.array([[1,3,3],
              [-1, 2, 0.1]
              ])
print(X*Y)

zer = np.ones((3,4))
print("zer\n",zer)

[[ 1.   -6.    9.  ]
 [ 1.    4.    0.01]]
zer
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
