Create Input Volume

In [3]:
import numpy as np

In [10]:
a = np.array([
 [1],
 [2]
])
b = np.array([
 [3,4],
 [5,6]
])
c = np.array([
 [1,2]
])

In [11]:
a + c

array([[2, 3],
       [3, 4]])

In [9]:
# Multiply two arrays 
x = [1,2,3]
y = [2,3,4]
    
def one():
    product = []
    for i in range(len(x)):
        product.append(x[i]*y[i])
    return product

# Linear algebra version
x = np.array([1,2,3])
y = np.array([2,3,4])

def two():
    return x * y    
%timeit one()
%timeit two()

The slowest run took 15.98 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 1.01 µs per loop
The slowest run took 23.56 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 347 ns per loop


In [70]:
#A depth column (or a fibre) at position (x,y) would be the activations X[x,y,:].
#A depth slice, or equivalently an activation map at depth d would be the activations X[:,:,d].

# Dimensions
# 11x11x4
# 4 channels/depth
# Filters = 1
# Filter size = 5x5
# Padding = 0
# Stride = 2

# Input 
X = np.random.randn(11,11,4)
print (X.shape)

# Weights
# Filter Size by Input Depth
# F * F * D
W1 = np.random.randn(5,5,4)
print (W1.shape)

# Bias
# Number of Depth Filters
# NumOfFilters * DepthOfInput
# 1 * 4 = 4 
B1 = np.random.randn(1,1,4)
print (B1.shape)

(11, 11, 4)
(5, 5, 4)
(1, 1, 4)


**Depth Slices**

In [71]:
# 1st Depth Slice
print ("1st Depth Slice")
X[:,:,:1].shape

print ("2nd Depth Slice")
X[:,:,1:2].shape

print ("3rd Depth Slice")
X[:,:,2:3].shape

print ("4th Depth Slice")
X[:,:,3:4].shape

1st Depth Slice
2nd Depth Slice
3rd Depth Slice
4th Depth Slice


(11, 11, 1)

**Sliding Filter Windows**

In [72]:
# First Filter, Upper Right Corner, 4 Depth Slices
print(X[:5,:5,:].shape)
print(X[:5,:5,:])
'''
First Window, all depth slices

So there are 5 rows, representing the filter width
In each row, there are 5 sub-rows representing the height
In each sub row, there are 4 columns representing the depth
We could have done, Depth by Width by Height, which would
have been more intuitive in my opinion, but this is how
it works since Depth is the last dimensions (Tensorflow)

[[[-1.45264122  0.29666966 -0.06727711 -1.83462739]
  [ 0.16060204  0.62596754 -1.12441172  0.42163557]
  [-0.84911239 -0.33899799 -0.28254783 -0.83739401]
  [-0.73294962  0.04195234 -0.04209224  2.87056823]
  [ 1.07981908  0.02380982 -1.9079376  -1.09054984]]

 [[-0.96693238 -1.43035947 -0.49384204  0.12650547]
  [ 0.46947879 -0.22102563  0.27685126  0.31438639]
  [ 1.08138615  0.02188208  0.45469774 -1.50532378]
  [ 1.01474361 -0.04212126 -0.71322167 -0.40965913]
  [ 0.69554548  2.62535066 -0.14347536  0.81848298]]
...
...
'''

(5, 5, 4)
[[[ 0.37877936 -0.43238156  0.31634215  1.19851896]
  [-1.46536883 -0.05500301 -0.44545083  1.17320452]
  [-0.65487911  0.36444339 -0.84210964 -0.46013371]
  [-0.85203774  0.81055294 -0.63282726 -0.19706205]
  [-1.48224433  1.21343735 -0.56536372 -2.06195958]]

 [[-0.04332273 -0.86220008  0.36636353 -1.10321606]
  [-0.78537115 -0.52092361 -1.31145543 -0.24886194]
  [-0.03227117  1.71295751 -1.66877298 -0.1234166 ]
  [ 0.64094085 -1.31539994  2.0979342  -0.01453015]
  [-0.00944743 -0.9252115  -0.27129851 -0.14455828]]

 [[-0.77466012  0.23092348 -0.61110858  0.04590287]
  [-0.8717773  -0.48514814  0.7743041  -0.67191022]
  [ 1.5070127   0.56600474  1.62005846  0.29596331]
  [ 1.15608133 -0.33384353 -0.89846361  1.82731601]
  [-1.00022004  0.78804887  0.65300209  3.02545549]]

 [[-1.03840368 -0.28882909 -0.59497663  1.70930694]
  [ 0.213791   -0.7559728   0.72087329 -0.34563537]
  [-0.4464405   0.02983258 -0.76741205 -0.17863479]
  [ 0.30866326 -1.17647071  1.6251852  -1.406152

'\nFirst Window, all depth slices\n\nSo there are 5 rows, representing the filter width\nIn each row, there are 5 sub-rows representing the height\nIn each sub row, there are 4 columns representing the depth\nWe could have done, Depth by Width by Height, which would\nhave been more intuitive in my opinion, but this is how\nit works since Depth is the last dimensions (Tensorflow)\n\n[[[-1.45264122  0.29666966 -0.06727711 -1.83462739]\n  [ 0.16060204  0.62596754 -1.12441172  0.42163557]\n  [-0.84911239 -0.33899799 -0.28254783 -0.83739401]\n  [-0.73294962  0.04195234 -0.04209224  2.87056823]\n  [ 1.07981908  0.02380982 -1.9079376  -1.09054984]]\n\n [[-0.96693238 -1.43035947 -0.49384204  0.12650547]\n  [ 0.46947879 -0.22102563  0.27685126  0.31438639]\n  [ 1.08138615  0.02188208  0.45469774 -1.50532378]\n  [ 1.01474361 -0.04212126 -0.71322167 -0.40965913]\n  [ 0.69554548  2.62535066 -0.14347536  0.81848298]]\n...\n...\n'

**More Intuitive Way To Represent the Input Volumn**

In [73]:
# A more intuitive way IMO
# Is to put depth 1st, so you have 
# 4 rows, each representing a depth
# And in each row you have nice 5x5 blocks
# representing the 2D filter, but alas
altX = np.random.randn(4,5,5)
altX

array([[[-0.9354668 ,  1.24713028, -0.28894276, -1.37239906, -0.06841833],
        [ 0.26292663, -0.40062423,  0.08003994, -0.15398875,  0.06420082],
        [ 1.98004688, -0.37700905, -0.07935733,  0.23104072, -0.73004913],
        [-1.10400234, -1.11343016,  1.38677329, -0.20441012, -0.16422174],
        [ 1.06526889, -0.13390513, -1.29845698, -0.28856199, -0.63418128]],

       [[-1.42000722,  0.29177472,  0.08194025,  1.18807784, -0.23926971],
        [ 0.35644881, -0.41413377, -0.12095747, -1.22334306, -0.33085206],
        [ 0.05110886, -0.46412856, -0.18325071,  0.73363567,  1.20141043],
        [-0.00912162, -0.25989276, -0.64762697, -0.36672697,  0.8012283 ],
        [-1.25612565,  0.42790086,  0.05321961, -0.72041118,  1.15892234]],

       [[ 0.13220687, -1.35726052,  1.80272648,  0.84437472, -0.49977229],
        [-0.96561949, -0.58216472, -1.25999572,  0.01327518,  0.39385971],
        [-0.0732223 , -0.6119556 ,  2.33214992,  0.12138975,  0.9279072 ],
        [ 0.0537622 ,

**Conv Layer Volume Generated**

In [84]:
# Output Volume - ConvLayer1

# Input Dimensions
# 11x11x4
# 4 channels/depth
# Filters = 1
# Filter size = 5x5
# Padding = 0
# Stride = 2

# Output Dimensions
# * Width = ((InputWidth − FilterWidth + 2*Padding) / Stride) +1
# * Height = ((InputHeight − FilterHeight + 2*Padding) / Stride) +1 
# * Depth = Number of filters
# W = ((11-5 + 2*0) / 2) + 1 = 6/2 + 1 = 4
# H = ((11-5 + 2*0) / 2) + 1 = 6/2 + 1 = 4
# D = 1
# Output Volume = (4,4,1)

# Dimensions
# Input = (11, 11, 4)
# Weights = (5, 5, 4)
# Output = (4,4,1)
# Depth Slices = # of filters * depth of input
# 1 * 4 = 4
# Bias = (1,4) 4

# Volume Initialization
V = np.zeros((4,4,1))

# He is moving the filter around and 
# multiplying by the weight
V[0,0,0] = np.sum(X[:5,:5,:] * W1) + B1[0][0][0]  #First bias value
V[1,0,0] = np.sum(X[2:7,:5,:] * W1) + B1[0][0][0]  
V[2,0,0] = np.sum(X[4:9,:5,:] * W1) + B1[0][0][0]  
V[3,0,0] = np.sum(X[6:11,:5,:] * W1) + B1[0][0][0]  

# What happens?
# We just slid the 1st filter over all the windows (4)
# Creating our output matrix of 4x4x1

print ("Depth Layer 1")
print(V)

# What's next?
# We repeat the process for the remaining 3 depth layers
# Let's do depth layer 2 (which has its own depth slice - unique filter + depth combo)
V[0,1,0] = np.sum(X[:5,:5,1:2] * W1) + B1[0][0][1]  #2nd bias value
V[1,1,0] = np.sum(X[2:7,:5,1:2] * W1) + B1[0][0][1]  
V[2,1,0] = np.sum(X[4:9,:5,1:2] * W1) + B1[0][0][1]  
V[3,1,0] = np.sum(X[6:11,:5,1:2] * W1) + B1[0][0][1]  

print ("Depth Layer 2")
print (V)

# Depth Layer 3
V[0,2,0] = np.sum(X[:5,:5,2:3] * W1) + B1[0][0][2]  #3rd bias value
V[1,2,0] = np.sum(X[2:7,:5,2:3] * W1) + B1[0][0][2]  
V[2,2,0] = np.sum(X[4:9,:5,2:3] * W1) + B1[0][0][2]  
V[3,2,0] = np.sum(X[6:11,:5,2:3] * W1) + B1[0][0][2]  

print ("Depth Layer 3")
print (V)

Depth Layer 1
[[[ -1.69101763]
  [  0.        ]
  [  0.        ]
  [  0.        ]]

 [[-10.52381755]
  [  0.        ]
  [  0.        ]
  [  0.        ]]

 [[  7.52697107]
  [  0.        ]
  [  0.        ]
  [  0.        ]]

 [[  8.00450175]
  [  0.        ]
  [  0.        ]
  [  0.        ]]]
Depth Layer 2
[[[ -1.69101763]
  [ -2.35912223]
  [  0.        ]
  [  0.        ]]

 [[-10.52381755]
  [  9.98922895]
  [  0.        ]
  [  0.        ]]

 [[  7.52697107]
  [-24.57512209]
  [  0.        ]
  [  0.        ]]

 [[  8.00450175]
  [ -6.08714045]
  [  0.        ]
  [  0.        ]]]
Depth Layer 3
[[[ -1.69101763]
  [ -2.35912223]
  [  6.76344072]
  [  0.        ]]

 [[-10.52381755]
  [  9.98922895]
  [ -4.44500367]
  [  0.        ]]

 [[  7.52697107]
  [-24.57512209]
  [ 26.10634608]
  [  0.        ]]

 [[  8.00450175]
  [ -6.08714045]
  [ 15.15776558]
  [  0.        ]]]
