In [10]:
# As usual, a bit of setup

import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.fc_net import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


<h1><center> Affine Layer (Forward) </center></h1>

In [12]:
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
W = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

In [13]:
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Reshaping the input vector to dimensions (N * D).
x = x.reshape(num_inputs, D)

## Performing forward pass of fully connected layer.
out = x.dot(W) + b

<h1><center> Affine Layer (Backward) </h1></center>

In [14]:
x = np.random.randn(10, 2, 3)
W = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, W, b)[0], x, dout)
dW_num = eval_numerical_gradient_array(lambda w: affine_forward(x, W, b)[0], W, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, W, b)[0], b, dout)

_, cache = affine_forward(x, W, b)

In [15]:
x, W, b = cache

## Defining the number of inputs.
numInputs = x.shape[0]
    
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Making a copy of x for performing vectorized operations.
xCopy = np.copy(x)

## Computing the derivative wrt Biases.
db = np.sum(dout, axis = 0)

## Reshaping the input vector to dimensions (N * D).
xCopy = xCopy.reshape(numInputs, D)

## Computing the derivative wrt Weights.
dW = xCopy.T.dot(dout)

## Computing the derivative wrt Input.
dx = dout.dot(W.T)

## Reshaping dx.
dx = dx.reshape(x.shape)

<h1><center> ReLU Forward </center></h1>

In [16]:
x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

## Apply the ReLU function -> f : max(0,x).
out = np.clip(x, 0, None)

<h1><center> ReLU Backward </center></h1>

In [17]:
x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

## BackProp through ReLU gate.
dout[x <= 0] = 0

<h1><center> Two Layer Net </center></h1>

In [18]:
N, D, H, C = 3, 5, 50, 7
X = np.random.randn(N, D)
y = np.random.randint(C, size=N)

std = 1e-2
model = TwoLayerNet(inputDim = D, hiddenDim = H, numClasses = C, weightScale = std)

print 'Testing initialization ... '
W1_std = abs(model.params['W1'].std() - std)
b1 = model.params['b1']
W2_std = abs(model.params['W2'].std() - std)
b2 = model.params['b2']
assert W1_std < std / 10, 'First layer weights do not seem right'
assert np.all(b1 == 0), 'First layer biases do not seem right'
assert W2_std < std / 10, 'Second layer weights do not seem right'
assert np.all(b2 == 0), 'Second layer biases do not seem right'

print 'Testing test-time forward pass ... '
model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
model.params['b2'] = np.linspace(-0.9, 0.1, num=C)

X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T

scores = model.loss(X)
correct_scores = np.asarray(
  [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
   [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
   [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
scores_diff = np.abs(scores - correct_scores).sum()
assert scores_diff < 1e-6, 'Problem with test-time forward pass'


Testing initialization ... 
Testing test-time forward pass ... 


<h1><center> Fully Connected Net </center></h1>

<h2><center> Parameter Initialisation </center></h2>

In [19]:
params = {}
weightScale = 5e-2
inputDim = 15
hiddenDims = [20, 30]
numClasses = 10

## Creating a list to contain the sizes of all the layers.
layerSizes = [inputDim] + hiddenDims + [numClasses]

## Initialising hidden layer parameters.
for i in range(len(layerSizes) - 1):

    params['W' + str(i + 1)] = np.random.normal(loc = 0, scale = weightScale, size = (layerSizes[i], layerSizes[i+1]))
    params['b' + str(i + 1)] = np.zeros(layerSizes[i+1])


<h2><center> Forward Pass </center></h2>

In [24]:
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))
X = X.astype(np.float32)
dropout_param = {'mode': 'train', 'p': 0.5}

## Creating a dictionary to store the layer outputs and cache.
outputs = {}
cache = {}

## Computing outputs and cache of hidden layers.
for i in range(0, len(hiddenDims)):
    
    outputs['hiddenLayer' + str(i+1)], cache['hiddenLayer' + str(i+1)] = affine_relu_forward(X, params['W' + str(i+1)], params['b'+ str(i+1)])
    outputs['hiddenLayerDrop' + str(i+1)], cache['hiddenLayerDrop' + str(i+1)] = dropout_forward((outputs['hiddenLayer' + str(i+1)]), dropout_param)
    X = outputs['hiddenLayerDrop' + str(i+1)]
    print i
    
## Computing outputs and cache of the last fully connected layer.
outputs['lastFC'], cache['lastFC'] = affine_forward(X, params['W' + str(i+2)], params['b'+ str(i+2)])

## Updating scores.
scores = outputs['lastFC']

0
1


In [25]:
outputs

{'hiddenLayer1': array([[ 0.        ,  0.        ,  0.        ,  0.02607499,  0.07893228,
          0.        ,  0.        ,  0.        ,  0.07910739,  0.        ,
          0.05491234,  0.        ,  0.41380838,  0.214339  ,  0.15115765,
          0.03036752,  0.        ,  0.15327572,  0.31620402,  0.03212944],
        [ 0.        ,  0.04105449,  0.        ,  0.        ,  0.        ,
          0.21353382,  0.04882277,  0.17599411,  0.        ,  0.        ,
          0.03762548,  0.        ,  0.        ,  0.27596727,  0.        ,
          0.20526127,  0.29660331,  0.00753878,  0.0181373 ,  0.43455143]]),
 'hiddenLayer2': array([[ 0.02091677,  0.        ,  0.04722296,  0.0090995 ,  0.01197577,
          0.        ,  0.00886081,  0.        ,  0.        ,  0.00884545,
          0.04087364,  0.0593303 ,  0.02639567,  0.07914092,  0.        ,
          0.01660949,  0.06276618,  0.00429832,  0.05919623,  0.        ,
          0.06710403,  0.        ,  0.        ,  0.0068579 ,  0.        ,
  

<h2><center> Backward Pass </center></h2>

In [26]:
reg = 3.14
loss, grads = 0.0, {}

## Computing the loss and the gradient for the softmax layer.
loss, dscores = softmax_loss(scores, y)

## Adding regularisation to the loss.
for j in range(0, len(hiddenDims) + 1):
    
    loss += 0.5 * reg * np.sum(params['W' + str(j+1)] * params['W' + str(j+1)])

## Performing backprop on the last fully connected layer.
dLastFC, grads['W' + str(i+2)], grads['b' + str(i+2)] = affine_backward(dscores, cache['lastFC'])
grads['W' + str(i+2)] += reg * params['W' + str(i+2)]

## Performing backprop on the hidden layers.
for i in range(len(hiddenDims), 0, -1):
    
    print i
    dHiddenDropout = dropout_backward(dLastFC, cache['hiddenLayerDrop' + str(i)])
    dHiddenRelu, grads['W' + str(i)], grads['b' + str(i)] = affine_relu_backward(dHiddenDropout, cache['hiddenLayer' + str(i)])
    grads['W' + str(i)] += reg * params['W' + str(i)]
    dLastFC = dHiddenRelu

2
1


In [27]:
cache

{'hiddenLayer1': ((array([[ 0.96554369, -0.27234837, -0.02399985,  0.10747438,  1.16306412,
            0.10552256, -1.51146781, -1.22684371,  0.62254721,  0.52552104,
            1.13797235, -0.98352355,  1.5097146 ,  0.95982569, -1.93638468],
          [ 0.90451193,  0.6059621 , -1.39523435,  0.13364638, -0.24734084,
           -1.60432684, -0.83816838,  0.00716738, -1.94305491, -1.63176   ,
           -2.07404089,  0.58184671,  0.27062485,  1.31253564, -1.37660038]], dtype=float32),
   array([[ 0.07497395,  0.03517791, -0.0034596 ,  0.00410609, -0.07889883,
            0.00258452,  0.04675758, -0.05330736, -0.02478993,  0.03356691,
            0.01943683,  0.09584876,  0.06801221,  0.01530044,  0.0243041 ,
           -0.02488814,  0.0080165 ,  0.02246509, -0.01017845,  0.00207934],
          [-0.07384992,  0.08577167, -0.00148549, -0.06220524,  0.03490603,
           -0.00235683,  0.02453902, -0.01424773,  0.04539472,  0.08552971,
            0.05857123,  0.03676285, -0.09900237, -0