In [1]:
# As usual, a bit of setup

import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


In [3]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num = np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num = np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num = 3)

convParam = {'stride': 2, 'pad': 1}

In [4]:
## Unpacking the convolutional parameters.
stride, padWidth = convParam['stride'], convParam['pad']

## Padding the input with zeroes.
xPadded = np.pad(x, pad_width = ((0, 0), (0, 0), (padWidth, padWidth), (padWidth, padWidth)), mode = 'constant', constant_values = 0)
xPadded.shape

(2, 3, 6, 6)

In [5]:
## Defining the input height, width, depth and inputSize.
inputHeight = x.shape[-2]
inputWidth = x.shape[-1]
inputDepth = x.shape[1]
inputSize = x.shape[0]

## Defining the filter height, width, depth and number of filters.
filterHeight = w.shape[-2]
filterWidth = w.shape[-1]
filerDepth = w.shape[1]
numFilters = w.shape[0]

## Defining the output height, width and depth.
outputHeight = ((inputHeight - filterHeight + 2 * padWidth) / stride + 1)
outputWidth = ((inputWidth - filterWidth + 2 * padWidth) / stride + 1)
outputDepth = numFilters

## Initializing the output activation map.
outputActivationMap = np.empty([inputSize, outputDepth, outputWidth, outputHeight])
print outputActivationMap.shape

(2, 3, 2, 2)


In [6]:
## Obtaining the necessary input slices over which
## the weight matrices will convolve.
for l in range(0, inputSize):

    for k in range(0, numFilters):

        for i in range(0, outputHeight):

            for j in range(0, outputWidth):

                ## Obtaining the input slice.
                xImageSlice = xPadded[l, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]
                
                ## Performing the dot product of the weight matrix with the image slice.
                outputActivationMap[l, k, i, j] = np.sum(xImageSlice * w[k]) + b[k]

In [7]:
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dOut = np.random.randn(4, 2, 5, 5)
convParam = {'stride': 1, 'pad': 1}

In [8]:
## Unpacking the convolutional parameters.
stride, padWidth = convParam['stride'], convParam['pad']

## Padding the input with zeroes.
xPadded = np.pad(x, pad_width = ((0, 0), (0, 0), (padWidth, padWidth), (padWidth, padWidth)), mode = 'constant', constant_values = 0)
xPadded.shape

(4, 3, 7, 7)

In [9]:
## Defining the input height, width, depth and inputSize.
inputHeight = x.shape[-2]
inputWidth = x.shape[-1]
inputDepth = x.shape[1]
inputSize = x.shape[0]

## Defining the filter height, width, depth and number of filters.
filterHeight = w.shape[-2]
filterWidth = w.shape[-1]
filterDepth = w.shape[1]
numFilters = w.shape[0]

## Defining the output height, width and depth.
outputHeight = ((inputHeight - filterHeight + 2 * padWidth) / stride + 1)
outputWidth = ((inputWidth - filterWidth + 2 * padWidth) / stride + 1)
outputDepth = numFilters

## Initializing the output activation map.
outputActivationMap = np.empty([inputSize, outputDepth, outputWidth, outputHeight])
print outputActivationMap.shape

(4, 2, 5, 5)


In [10]:
## Create placeholders for the gradients.
dW = np.zeros_like(w)
dB = np.zeros_like(b)
dX = np.zeros_like(x)
dXPadded = np.zeros_like(xPadded)

In [11]:
print x.shape
print w.shape
print b.shape
print outputActivationMap.shape

(4, 3, 5, 5)
(2, 3, 3, 3)
(2,)
(4, 2, 5, 5)


In [13]:
for n in range(0, inputSize):

    for k in range(0, numFilters):
        
        ## Computing the gradient with respect to biases.
        dB[k] += outputActivationMap[n, k].sum()

        for i in range(0, outputHeight):

            for j in range(0, outputWidth):

                ## Obtaining the relevant slice of the input.
                xImageSlice = xPadded[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth]
                
                ## Obtaining the upstream gradient of the corresponding activation map.
                dOutUpstream = outputActivationMap[n, k, i, j]
                
                ## Computing the gradient with respect to the weights.
                dW[k] += xImageSlice * dOutUpstream
                
                ## Computing the gradient with respect to the input.
                dXPadded[n, :, i * stride : i * stride + filterHeight, j * stride : j * stride + filterWidth] += w[k] * dOutUpstream
            
                
            
                
                     
                
                
                
                
                
                
                

In [15]:
x_shape = (2, 3, 4, 4)
x = np.linspace(-0.3, 0.4, num=np.prod(x_shape)).reshape(x_shape)
poolParam = {'poolWidth': 2, 'poolHeight': 2, 'stride': 2}

In [16]:
## Defining the input size, depth, height and width.
inputSize = x.shape[0]
inputDepth = x.shape[1]
inputHeight = x.shape[2]
inputWidth = x.shape[3]

## Unpacking the pooling parameters.
poolWidth, poolHeight, poolStride = poolParam['poolWidth'], poolParam['poolHeight'], poolParam['stride']

## Defining the output size, depth, height and width.
outputSize = x.shape[0]
outputDepth = inputDepth
outputHeight = ((inputHeight - poolHeight) / poolStride) + 1
outputWidth = ((inputWidth - poolWidth) / poolStride) + 1

## Initializing the output activation map.
outputActivationMap = np.empty([outputSize, outputDepth, outputHeight, outputWidth])
print outputActivationMap.shape

(2, 3, 2, 2)


In [22]:
## Performing the pooling operation.
for n in range(0, inputSize):
    for k in range(0, outputDepth):
        for i in range(0, outputHeight):
            for j in range(0, outputWidth):
                
                ## Obtaining the relevant slice.
                xImageSlice = x[n, k, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth]
                    
                
                ## Filling in the correct values in the output placeholder.
                outputActivationMap[n, k, i, j] = np.amax(xImageSlice)
                
                
                
                    
                    

(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)
(2, 2)


In [26]:
## Performing the pooling operation.
for n in range(0, inputSize):
    for i in range(0, outputHeight):
        for j in range(0, outputWidth):

            ## Obtaining the relevant slice.
            xImageSlice = x[n, :, i * poolStride : i * poolStride + poolHeight, j * poolStride : j * poolStride + poolWidth]
            a = np.amax(xImageSlice, axis=(-1, -2))
            print a.shape
            


            ## Filling in the correct values in the output placeholder.
            outputActivationMap[n, k, i, j] = np.amax(xImageSlice)

                
                
                    
                    

(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
