In [2]:
# As usual, a bit of setup

import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.fc_net import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [3]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


<h1><center> Affine Layer (Forward) </center></h1>

In [4]:
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
W = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

In [5]:
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Reshaping the input vector to dimensions (N * D).
x = x.reshape(num_inputs, D)

## Performing forward pass of fully connected layer.
out = x.dot(W) + b

<h1><center> Affine Layer (Backward) </h1></center>

In [6]:
x = np.random.randn(10, 2, 3)
W = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, W, b)[0], x, dout)
dW_num = eval_numerical_gradient_array(lambda w: affine_forward(x, W, b)[0], W, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, W, b)[0], b, dout)

_, cache = affine_forward(x, W, b)

In [7]:
x, W, b = cache

## Defining the number of inputs.
numInputs = x.shape[0]
    
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Making a copy of x for performing vectorized operations.
xCopy = np.copy(x)

## Computing the derivative wrt Biases.
db = np.sum(dout, axis = 0)

## Reshaping the input vector to dimensions (N * D).
xCopy = xCopy.reshape(numInputs, D)

## Computing the derivative wrt Weights.
dW = xCopy.T.dot(dout)

## Computing the derivative wrt Input.
dx = dout.dot(W.T)

## Reshaping dx.
dx = dx.reshape(x.shape)

<h1><center> ReLU Forward </center></h1>

In [8]:
x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

## Apply the ReLU function -> f : max(0,x).
out = np.clip(x, 0, None)

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.04545455,  0.13636364],
       [ 0.22727273,  0.31818182,  0.40909091,  0.5       ]])

<h1><center> ReLU Backward </center></h1>

In [10]:
x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

## BackProp through ReLU gate.
dout[x <= 0] = 0
dx

0

In [11]:
x

array([[ 0.39164295, -0.79769136,  0.28735596,  0.40347591, -0.10815237,
        -0.35446383, -0.57378113,  1.01145312, -1.28713829, -0.61616205],
       [-0.32359089, -0.2852897 ,  0.69647461,  2.40789562, -0.84624562,
        -0.71669625, -1.0242654 , -0.21732616,  1.26398013, -0.2608999 ],
       [ 0.90467173,  0.40028064, -1.29081234,  0.94578446, -0.81435546,
         2.09873751,  0.86239658,  0.04752972,  0.67315984, -1.70108358],
       [ 1.10433971,  0.91914915, -0.73090809,  1.16951822,  0.03076439,
        -0.57590263, -0.23044095, -0.4106192 ,  0.94546369,  1.01176449],
       [ 0.09124521,  0.07274917,  1.31220899, -0.02525266, -0.49917784,
         0.44070472, -0.39562847,  1.55814624,  0.47190073,  0.24475753],
       [ 0.95475616, -0.25437232, -1.37066828,  0.9024877 , -0.53680422,
         0.67381101, -0.5605206 , -0.26707372,  1.16534415,  0.32356847],
       [-0.35576358, -1.10485316,  0.31022024,  1.40305598,  0.50432478,
        -1.51177716,  0.51929776, -1.60126071

In [12]:
dout

array([[-0.18837948,  0.        ,  0.44918628, -0.53846802,  0.        ,
         0.        ,  0.        , -1.18585718,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.22388673, -1.00279747,  0.        ,
         0.        ,  0.        ,  0.        , -0.19097469,  0.        ],
       [-0.78067125,  1.43265362,  0.        ,  0.36087784,  0.        ,
         0.05604862,  0.77709261, -0.29198096,  1.85855462,  0.        ],
       [-2.06313609, -0.20062104,  0.        ,  0.42984789,  1.94284173,
         0.        ,  0.        ,  0.        , -0.29507396,  1.3694706 ],
       [ 0.44926258,  0.12248105,  0.97456951,  0.        ,  0.        ,
        -0.07185405,  0.        ,  0.86855631, -0.24081621, -0.50021633],
       [-1.37420825,  0.        ,  0.        ,  1.06609814,  0.        ,
        -0.68319251,  0.        ,  0.        ,  0.77295486, -0.78332149],
       [ 0.        ,  0.        , -0.23297831,  0.02078079, -1.3964227 ,
         0.        ,  0.01481958,  0.        