In [1]:
# As usual, a bit of setup

import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver
from numpy import unravel_index

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


In [3]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num = np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num = np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num = 3)

convParam = {'stride': 2, 'pad': 1}

In [4]:
## Unpacking the convolutional parameters.
stride, padWidth = convParam['stride'], convParam['pad']

## Padding the input with zeroes.
xPadded = np.pad(x, pad_width = ((0, 0), (0, 0), (padWidth, padWidth), (padWidth, padWidth)), mode = 'constant', constant_values = 0)
xPadded.shape

(2, 3, 6, 6)

In [5]:
## Defining the input height, width, depth and inputSize.
inputHeight = x.shape[-2]
inputWidth = x.shape[-1]
inputDepth = x.shape[1]
inputSize = x.shape[0]

## Defining the filter height, width, depth and number of filters.
filterHeight = w.shape[-2]
filterWidth = w.shape[-1]
filerDepth = w.shape[1]
numFilters = w.shape[0]

## Defining the output height, width and depth.
outputHeight = ((inputHeight - filterHeight + 2 * padWidth) / stride + 1)
outputWidth = ((inputWidth - filterWidth + 2 * padWidth) / stride + 1)
outputDepth = numFilters

## Initializing the output activation map.
outputActivationMap = np.empty([inputSize, outputDepth, outputWidth, outputHeight])
print outputActivationMap.shape

(2, 3, 2, 2)


In [6]:
## Obtaining the necessary input slices over which
## the weight matrices will convolve.
for l in range(0, inputSize):

    for k in range(0, numFilters):

        for i in range(0, outputHeight):

            for j in range(0, outputWidth):

                ## Obtaining the input slice.
                xImageSlice = xPadded[l, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]
                
                ## Performing the dot product of the weight matrix with the image slice.
                outputActivationMap[l, k, i, j] = np.sum(xImageSlice * w[k]) + b[k]

In [7]:
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dOut = np.random.randn(4, 2, 5, 5)
convParam = {'stride': 1, 'pad': 1}

In [8]:
## Unpacking the convolutional parameters.
stride, padWidth = convParam['stride'], convParam['pad']

## Padding the input with zeroes.
xPadded = np.pad(x, pad_width = ((0, 0), (0, 0), (padWidth, padWidth), (padWidth, padWidth)), mode = 'constant', constant_values = 0)
xPadded.shape

(4, 3, 7, 7)

In [9]:
## Defining the input height, width, depth and inputSize.
inputHeight = x.shape[-2]
inputWidth = x.shape[-1]
inputDepth = x.shape[1]
inputSize = x.shape[0]

## Defining the filter height, width, depth and number of filters.
filterHeight = w.shape[-2]
filterWidth = w.shape[-1]
filterDepth = w.shape[1]
numFilters = w.shape[0]

## Defining the output height, width and depth.
outputHeight = ((inputHeight - filterHeight + 2 * padWidth) / stride + 1)
outputWidth = ((inputWidth - filterWidth + 2 * padWidth) / stride + 1)
outputDepth = numFilters

## Initializing the output activation map.
outputActivationMap = np.empty([inputSize, outputDepth, outputWidth, outputHeight])
print outputActivationMap.shape

(4, 2, 5, 5)


In [10]:
## Create placeholders for the gradients.
dW = np.zeros_like(w)
dB = np.zeros_like(b)
dX = np.zeros_like(x)
dXPadded = np.zeros_like(xPadded)

In [11]:
print x.shape
print w.shape
print b.shape
print outputActivationMap.shape

(4, 3, 5, 5)
(2, 3, 3, 3)
(2,)
(4, 2, 5, 5)


In [12]:
for n in range(0, inputSize):

    for k in range(0, numFilters):
        
        ## Computing the gradient with respect to biases.
        dB[k] += outputActivationMap[n, k].sum()

        for i in range(0, outputHeight):

            for j in range(0, outputWidth):

                ## Obtaining the relevant slice of the input.
                xImageSlice = xPadded[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]
                
                ## Obtaining the upstream gradient of the corresponding activation map.
                dOutUpstream = outputActivationMap[n, k, i, j]
                
                ## Computing the gradient with respect to the weights.
                dW[k] += xImageSlice * dOutUpstream
                
                ## Computing the gradient with respect to the input.
                dXPadded[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth] += w[k] * dOutUpstream
            

In [None]:
## Testing notational backpropagation.

In [26]:
x = np.random.randn(4, 3, 4, 4)
w = np.random.randn(2, 3, 2, 2)
convParam = {'stride': 2, 'pad': 0}

## Unpacking the convolutional parameters.
stride, padWidth = convParam['stride'], convParam['pad']

In [27]:
## Defining the input size, depth, height and width.
inputSize = x.shape[0]
inputDepth = x.shape[1]
inputHeight = x.shape[2]
inputWidth = x.shape[3]

## Defining the filter height, width, depth and number of filters.
numFilters = w.shape[0]
filerDepth = w.shape[1]
filterHeight = w.shape[2]
filterWidth = w.shape[3]

## Defining the output size, depth, height and width.
outputSize = x.shape[0]
outputDepth = numFilters
outputHeight = ((inputHeight - filterHeight + 2 * padWidth) / stride + 1)
outputWidth = ((inputWidth - filterWidth + 2 * padWidth) / stride + 1)

## Initializing the output activation map.
outputActivationMap = np.empty([outputSize, outputDepth, outputHeight, outputWidth])
print outputActivationMap.shape

(4, 2, 2, 2)


In [29]:
## Obtaining the necessary input slices over which
## the weight matrices will convolve.
for n in range(0, inputSize):
    for k in range(0, numFilters):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):

                ## Obtaining the input slice.
                xImageSlice = x[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]

                ## Performing the dot product of the weight matrix with the image slice.
                outputActivationMap[n, k, i, j] = np.sum(xImageSlice * w[k]) + b[k]

In [32]:
## Create placeholders for the gradients.
dW = np.zeros_like(w)
dX = np.zeros_like(x)
dOut = np.random.randn(4, 2, 2, 2)

In [35]:
for n in range(0, inputSize):
    for k in range(0, numFilters):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):

                ## Obtaining the relevant slice of the input.
                xImageSlice = x[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]

                ## Obtaining the upstream gradient of the corresponding activation map.
                dOutUpstream = dOut[n, k, i, j]

                ## Computing the gradient with respect to the weights.
                dW[k] += xImageSlice * dOutUpstream

#                 ## Computing the gradient with respect to the input.
#                 dXPadded[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth] += w[k] * dOutUpstream

In [40]:
W_fcij = w[0][0][1][1]

for n in range(0, inputSize):
    for k in range(0, numFilters):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):
                
                ## Obtaining the relevant slice of the input.
                xImageSlice = x[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]
                
#                 ## Obtain the scalar value associated with the slice under consideration.
#                 xImageSliceVal = xImageSlice[, i, j]
                
#                 ## Obtaining the upstream gradient of the corresponding activation map.
#                 dOutUpstream = dOut[n, k, i, j]
                
#                 ## Compute the gradient with respect to the weights.
#                 dW[k, ]
                
                
                
                
                
               
                

# sub_xpad = x_pad[:, cprime, i:i + Hh * S:S, j:j + Hw * S:S]
# dw[fprime, cprime, i, j] = np.sum(dout[:, fprime, :, :] * sub_xpad)   

(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)
(3, 2, 2)


In [13]:
x_shape = (2, 3, 4, 4)
x = np.linspace(-0.3, 0.4, num=np.prod(x_shape)).reshape(x_shape)
poolParam = {'poolWidth': 2, 'poolHeight': 2, 'stride': 2}

In [14]:
## Defining the input size, depth, height and width.
inputSize = x.shape[0]
inputDepth = x.shape[1]
inputHeight = x.shape[2]
inputWidth = x.shape[3]

## Unpacking the pooling parameters.
poolWidth, poolHeight, poolStride = poolParam['poolWidth'], poolParam['poolHeight'], poolParam['stride']

## Defining the output size, depth, height and width.
outputSize = x.shape[0]
outputDepth = inputDepth
outputHeight = ((inputHeight - poolHeight) / poolStride) + 1
outputWidth = ((inputWidth - poolWidth) / poolStride) + 1

## Initializing the output activation map.
outputActivationMap = np.empty([outputSize, outputDepth, outputHeight, outputWidth])
print outputActivationMap.shape

(2, 3, 2, 2)


In [15]:
## Performing the pooling operation.
for n in range(0, inputSize):
    for k in range(0, outputDepth):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):
                
                ## Obtaining the relevant slice.
                xImageSlice = x[n, k, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth]
                    
                
                ## Filling in the correct values in the output placeholder.
                outputActivationMap[n, k, i, j] = np.amax(xImageSlice)
                
                
                
                    
                    

In [16]:
## Performing the pooling operation.
for n in range(0, inputSize):
    for i in range(0, outputHeight):
        for j in range(0, outputWidth):

            ## Obtaining the relevant slice.
            xImageSlice = x[n, :, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth]
            a = np.amax(xImageSlice, axis = (-1, -2))
            print a.shape
            


            ## Filling in the correct values in the output placeholder.
            outputActivationMap[n, k, i, j] = np.amax(xImageSlice)

                
                
                    
                    

(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)


In [17]:
x = np.random.randn(3, 2, 8, 8)
dOut = np.random.randn(3, 2, 4, 4)
poolParam = {'poolHeight': 2, 'poolWidth': 2, 'stride': 2}

## Create a placeholder for holding the gradient.
dX = np.zeros_like(x)
print dX.shape

(3, 2, 8, 8)


In [18]:
## Defining the input size, depth, height and width.
inputSize = x.shape[0]
inputDepth = x.shape[1]
inputHeight = x.shape[2]
inputWidth = x.shape[3]

## Unpacking the pooling parameters.
poolWidth, poolHeight, poolStride = poolParam['poolWidth'], poolParam['poolHeight'], poolParam['stride']

## Defining the output size, depth, height and width.
outputSize = x.shape[0]
outputDepth = inputDepth
outputHeight = ((inputHeight - poolHeight) / poolStride) + 1
outputWidth = ((inputWidth - poolWidth) / poolStride) + 1

## Initializing the output activation map.
outputActivationMap = np.empty([outputSize, outputDepth, outputHeight, outputWidth])
print outputActivationMap.shape

(3, 2, 4, 4)


In [19]:
for n in range(0, inputSize):
    for k in range(0, outputDepth):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):

                ## Obtaining the relevant slice.
                xImageSlice = x[n, k, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth] 
                
                ## Obtaining the index of the maximum element in the above slice.
                maxElemIndex = unravel_index(xImageSlice.argmax(), xImageSlice.shape)
             
                ## Computing the gradient.
                dX[n, k, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth][maxElemIndex[0]][maxElemIndex[1]] = 1 * dOut[n, k, i, j]
                
                print xImageSlice
                print maxElemIndex
                print dX[n, k, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth] 
                print 
                
                



[[-1.08543999 -1.09020967]
 [-0.17474137 -0.27532043]]
(1, 0)
[[ 0.         0.       ]
 [-1.2048327  0.       ]]

[[-0.36336532 -0.03204642]
 [ 0.20629803 -0.34149567]]
(1, 0)
[[ 0.          0.        ]
 [ 0.07820395  0.        ]]

[[ 0.29361606  1.03376661]
 [-0.39174757  0.33624841]]
(0, 1)
[[ 0.          0.47180441]
 [ 0.          0.        ]]

[[-0.60328774 -0.18165842]
 [ 1.63789651 -2.75822584]]
(1, 0)
[[ 0.          0.        ]
 [-1.42832102  0.        ]]

[[ 1.96990793 -0.63375514]
 [-0.59396112 -0.07222808]]
(0, 0)
[[-0.45267485  0.        ]
 [ 0.          0.        ]]

[[ 1.36152283 -0.83681093]
 [ 0.98724    -1.15901322]]
(0, 0)
[[ 0.08220139  0.        ]
 [ 0.          0.        ]]

[[-2.43175474  0.13976704]
 [ 0.0238783  -2.55202933]]
(0, 1)
[[ 0.         -0.11236069]
 [ 0.          0.        ]]

[[-0.03377166 -1.17678004]
 [ 1.32294847 -0.2003593 ]]
(1, 0)
[[ 0.          0.        ]
 [-0.05807256  0.        ]]

[[ 0.27612015 -0.76507946]
 [-1.99476529  1.22765567]]
(1, 1

In [37]:
## Defining the input size.
x = np.random.randn(50, 3, 32, 32)
y = np.random.randint(10, size = 50)
numFilters = 32
filterSize = 7
hiddenDim = 100
numClasses = 10
weightScale = 1e-3

In [38]:
## Initialising the parameters.
W1 = np.random.normal(loc = 0, scale = weightScale, size = (numFilters, x.shape[1], filterSize, filterSize))
b1 = np.zeros(numFilters)

## Assuming the output of the convolutional layer is the same as the input and then reduced by a factor of 4 after applying 2*2 max pooling.
W2 = np.random.normal(loc = 0, scale = weightScale, size = ((numFilters * x.shape[2] * x.shape[3]) / 4, hiddenDim))
b2 = np.zeros(hiddenDim)

W3 = np.random.normal(loc = 0, scale = weightScale, size = (hiddenDim, numClasses))
b3 = np.zeros(numClasses)

# Pass convParam to the forward pass for the convolutional layer.
convParam = {'stride': 1, 'pad': (filterSize - 1) / 2}
poolParam = {'poolHeight': 2, 'poolWidth': 2, 'stride': 2}

print W1.shape
print " "
print W2.shape
print " "
print W3.shape

(32, 3, 7, 7)
 
(8192, 100)
 
(100, 10)


In [39]:
## Applying the convolutional layer, followed by a reLu non-linearity which
## is then followed by a max-pooling layer.
maxPoolOut, maxPoolCache = conv_relu_pool_forward(x, W1, b1, convParam, poolParam)

## Reshaping the above output so that affine transformation (fully connected 
## layers) can be used.
maxPoolOut = maxPoolOut.reshape(maxPoolOut.shape[0], maxPoolOut.shape[1] * maxPoolOut.shape[2] * maxPoolOut.shape[3])

## Applying the affine transformation.
reLuOut, reLuCache = affine_relu_forward(maxPoolOut, W2, b2)    

## Applying the final affine transformation.
fcOut, fcCache = affine_forward(reLuOut, W3, b3)

scores = fcOut

In [54]:
grads = {}

## Softmax Layer (Forward + Backward).
loss, dScores = softmax_loss(scores, y)

## Backproping through the last fully-connected layer.
dReluOut, grads['W3'], grads['b3'] = affine_backward(dScores, fcCache)

## Backproping through the hidden layer.
dMaxPoolOut, grads['W2'], grads['b2'] = affine_relu_backward(dReluOut, reLuCache)

## Reshaping the gradient matrix.
dMaxPoolOut = dMaxPoolOut.reshape(dMaxPoolOut.shape[0], numFilters, x.shape[2] / 2, x.shape[3] / 2)

## Backproping through the convolutional layer.
dX, grads['W1'], grads['b1'] = conv_relu_pool_backward(dMaxPoolOut, maxPoolCache)

In [55]:
print maxPoolOut.shape
print dMaxPoolOut.shape

print W2.shape
print grads['W2'].shape

print b1.shape
print grads['b1'].shape

(50, 8192)
(50, 32, 16, 16)
(8192, 100)
(8192, 100)
(32,)
(32,)


In [50]:
dMaxPoolOut = dMaxPoolOut.reshape(dMaxPoolOut.shape[0], numFilters, x.shape[2] / 2, x.shape[3] / 2)
print dMaxPoolOut.shape

(50, 32, 16, 16)
