In [1]:
# As usual, a bit of setup

import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.fc_net import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


<h1><center> Forward Pass </center></h1>

In [3]:
## Specifying the different inputs required for the batch normalization forward pass.

## Simulate the forward pass for a two-layer network
N, D1, D2, D3 = 200, 50, 60, 3
X = np.random.randn(N, D1)
W1 = np.random.randn(D1, D2)
W2 = np.random.randn(D2, D3)
a = np.maximum(0, X.dot(W1)).dot(W2)

## Setting gamma = 1 and beta = 0.
gamma = np.ones(D3)
beta = np.ones(D3)

## Additional parameters.
batchNormDict = {'mode' : 'train',
                 'eps' : 1e-5,
                 'momentum' : 0.9,
                 'runningMean' : np.zeros(D3),
                 'runningVar' : np.zeros(D3)}

In [14]:
## Computing the mean and variance of the input along each dimension (feature).
sampleMean = np.mean(a, axis = 0)
sampleVariance = np.var(a, axis = 0)

## Normalizing the input.
out = ((a - sampleMean)/sampleVariance)

## Scaling and Shifting the normalized data.
out = (gamma * out + beta)

## Updating the running mean and running variance.
batchNormDict['runningMean'] = batchNormDict['momentum'] * batchNormDict['runningMean'] + (1 - batchNormDict['momentum']) * sampleMean
batchNormDict['runningVar'] = batchNormDict['momentum'] * batchNormDict['runningVar'] + (1 - batchNormDict['momentum']) * sampleVariance