In [1]:
# As usual, a bit of setup

import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.fc_net import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


<h1><center> Affine Layer (Forward) </center></h1>

In [3]:
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
W = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

In [4]:
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Reshaping the input vector to dimensions (N * D).
x = x.reshape(num_inputs, D)

## Performing forward pass of fully connected layer.
out = x.dot(W) + b

<h1><center> Affine Layer (Backward) </h1></center>

In [5]:
x = np.random.randn(10, 2, 3)
W = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, W, b)[0], x, dout)
dW_num = eval_numerical_gradient_array(lambda w: affine_forward(x, W, b)[0], W, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, W, b)[0], b, dout)

_, cache = affine_forward(x, W, b)

In [6]:
x, W, b = cache

## Defining the number of inputs.
numInputs = x.shape[0]
    
## Defining D = d_1 * d_2 * ..... * d_k
D = np.prod(np.array(x.shape[1:]))

## Making a copy of x for performing vectorized operations.
xCopy = np.copy(x)

## Computing the derivative wrt Biases.
db = np.sum(dout, axis = 0)

## Reshaping the input vector to dimensions (N * D).
xCopy = xCopy.reshape(numInputs, D)

## Computing the derivative wrt Weights.
dW = xCopy.T.dot(dout)

## Computing the derivative wrt Input.
dx = dout.dot(W.T)

## Reshaping dx.
dx = dx.reshape(x.shape)

<h1><center> ReLU Forward </center></h1>

In [7]:
x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

## Apply the ReLU function -> f : max(0,x).
out = np.clip(x, 0, None)

<h1><center> ReLU Backward </center></h1>

In [8]:
x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

## BackProp through ReLU gate.
dout[x <= 0] = 0

<h1><center> Two Layer Net </center></h1>

In [28]:
N, D, H, C = 3, 5, 50, 7
X = np.random.randn(N, D)
y = np.random.randint(C, size=N)

std = 1e-2
model = TwoLayerNet(inputDim = D, hiddenDim = H, numClasses = C, weightScale = std)

print 'Testing initialization ... '
W1_std = abs(model.params['W1'].std() - std)
b1 = model.params['b1']
W2_std = abs(model.params['W2'].std() - std)
b2 = model.params['b2']
assert W1_std < std / 10, 'First layer weights do not seem right'
assert np.all(b1 == 0), 'First layer biases do not seem right'
assert W2_std < std / 10, 'Second layer weights do not seem right'
assert np.all(b2 == 0), 'Second layer biases do not seem right'

print 'Testing test-time forward pass ... '
model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
model.params['b2'] = np.linspace(-0.9, 0.1, num=C)

X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T

scores = model.loss(X)
correct_scores = np.asarray(
  [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
   [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
   [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
scores_diff = np.abs(scores - correct_scores).sum()
assert scores_diff < 1e-6, 'Problem with test-time forward pass'


Testing initialization ... 
Testing test-time forward pass ... 
