# 2.3 The Gears of Neural Networks - Tensor Operations

## 2.3.1 Element-wise Operations

In [1]:
# Consider the below function a naive implementation of relu
# This is essentially what a Dense layer with activation='relu' would be doing.
# e.g. keras.layers.dense(512, activation='relu')
# e.g. output = relu(dot(W, input) + b)
# We have a dot product of two tensors (W, input), an addition of 2D tensor with vector b, and relu which is just max(x, 0)

def naive_relu(x):
    assert len(x.shape) == 2
    
    x = x.copy
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] = max(x[i, j], 0)
    return x

In [2]:
# Can do the same with addition, and other basic operations

def naive_add(x, y):
    assert len(x.shape) == 2
    assert x.shape == y.shape
    
    x = x.copy
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] += y[i, j]
    return x

In [6]:
# Fortunately these exist as well optimized operations in Python and packages like Numpy

import numpy as np

# element-wise addition, equivalent to above
def add(x, y):
    return x + y

# element-wise relu, equivalent to above
def relu(x):
    return np.maximum(x, 0)

## 2.3.2 Broadcasting

In [29]:
# With our Dense layer before we added a 2D tensor to a vector, but our naive add only supports 2d tensors of the same shape.
# How do we adjust for different tensor shapes?
# We can broadcast, if possible, to resolve this by adding dimensions to the smaller tensor to match the larger one
# then we repeat the tensor alongside these new axes to match the full shape of larger tensors
# new_Y[i, :] == old_y after this is done

def naive_add_matrix_and_vector(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    # Vector must be as long as the matrix is wide for addition to be feasible
    assert x.shape[1] == y.shape[0]
    
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            z[i] += x[i, j] * y[j]
    return z
    

In [30]:
x = np.random.rand(3, 2)
print(x)

[[0.93518481 0.26755541]
 [0.88230601 0.98446822]
 [0.233801   0.99898316]]


In [31]:
y = np.random.rand(2,)
print(y)

[0.62701281 0.8203303 ]


In [32]:
z = np.zeros(x.shape[0])
print(z)

[0. 0. 0.]


In [33]:
z = naive_add_matrix_and_vector(x, y)
print(z)

[0.80585666 1.36080628 0.96609238]


In [34]:
# Generally you can apply two tensor element-wise operations if one tensor has shape
# (a, b, ... n, n+1, ... m) and the other has shape (n, n+1, ... m)
# The broadcasting will automatically happen for axes a through n-1
# e.g. below
x = np.random.random((64, 3, 32, 10))
y = np.random.random((32, 10))

z = np.maximum(x, y)
print(z.shape)

(64, 3, 32, 10)


## 2.3.3 Tensor Dot

In [40]:
# Also called tensor product, not the same as element-wise product though
x = np.random.random((32, 10))
y = np.random.random((10, 32))

z = np.dot(x, y)
print(z[:3])

[[2.68506263 3.31406718 3.78618941 2.91334889 2.89658944 2.68711702
  2.72080266 3.91639288 3.03898946 3.51045239 2.99665752 2.69715989
  2.77307728 3.40303366 3.46792519 2.7364031  2.65233852 2.9082524
  2.83816384 2.72600704 3.223352   2.32999656 3.80403233 2.28046711
  2.54219896 2.75399334 3.13919208 2.8065087  2.09730971 3.44830624
  2.01912892 1.97303338]
 [2.67441819 3.03880549 3.20581212 2.13772117 3.43112122 2.26780449
  2.97495391 3.41085141 2.66971216 3.15754697 3.17190367 2.46242681
  2.99725517 3.56114378 3.11727866 2.75202033 1.77377784 2.5536478
  2.64962949 2.14115705 3.06736666 2.17089724 3.28569394 2.05690939
  2.46163852 2.22251122 2.832659   2.1932295  2.17501999 3.23738712
  1.83474276 1.54459735]
 [2.56344727 3.03318593 3.48801441 2.82725123 3.29654152 2.05170162
  3.14051    3.46050501 2.9664575  3.52196908 3.71356589 2.71625683
  2.8165592  3.55829951 2.76154499 2.20191054 2.20997216 2.92111193
  2.53164638 3.16500459 3.0233204  2.49809725 3.30720446 2.64084507


In [41]:
def naive_vector_dot(x, y):
    # vectors must be the same size to be used in dot product
    assert len(x.shape) == 1
    assert len(y.shape) == 1
    assert x.shape[0] == y.shape[0]
    
    z = 0
    for i in range(x.shape[0]):
        z += x[i] * y[i]
    return z # returns a scalar

x = np.random.random((10))
y = np.random.random((10))

z = naive_vector_dot(x, y)
print(z)

2.8965211159609283


In [48]:
# Can also do dot product of matrix x and vector y

def naive_matrix_vector_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    
    # Vector length must be same as matrix width
    assert x.shape[1] == y.shape[0]
    
    # Answer has shape equal to number of rows in matrix x
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        # can also do z[i] = naive_vector_dot(x[i, :], y)
        for j in range(x.shape[1]):
            z[i] += x[i, j] * y[j]
    return z
    

x = np.random.random((3, 5))
y = np.random.random((5))

print(x.shape, y.shape)

(3, 5) (5,)


In [49]:
z = naive_matrix_vector_dot(x, y)   
print(z)

[1.74861648 1.40270977 1.1569119 ]


In [57]:
# When a tensor's ndim > 1, note that dot is no longer symmetric (x . y != y . x)
# You can take the dot product of two matrixes x and y if and only if x.shape[1] = y.shape[0] (x_num_cols == y_num_rows)
# The results is a matrix with shape (x.shape[0], y.shape[1])

def naive_matrix_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 2    
    
    # x_rows == y_cols
    assert x.shape[1] == y.shape[0]
    z = np.zeros((x.shape[0], y.shape[1]))
    for i in range(x.shape[0]):
        for j in range(y.shape[1]):
            row_x = x[i, :]
            col_y = y[:, j]
            
            z[i, j] = naive_vector_dot(row_x, col_y)
    return z

x = np.random.random((2, 3))
y = np.random.random((3, 4))

print(x.shape, x)

(2, 3) [[0.50479263 0.90559633 0.85328296]
 [0.0825802  0.53431785 0.6635519 ]]


In [58]:
print(y.shape, y)

(3, 4) [[0.74244733 0.34011211 0.14474552 0.81383989]
 [0.20085238 0.25385649 0.51022188 0.33615547]
 [0.87909284 0.32736118 0.00525729 0.96755869]]


In [59]:
z = naive_matrix_dot(x, y)
print(z.shape, z)

(2, 4) [[1.30678806 0.68090931 0.53960749 1.54084288]
 [0.75195419 0.38094772 0.28806226 0.88884634]]


## 2.3.4 Tensor Reshaping

In [60]:
x = np.array([[0, 1], [2, 3], [4, 5]])
x.shape

(3, 2)

In [64]:
x.reshape(6,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

In [65]:
x.reshape(2, 3)

array([[0, 1, 2],
       [3, 4, 5]])

In [66]:
# Transposing, exchanging rows and columns so that x[i, :] becomes x[:, i]
x = np.array([[0, 1], [2, 3], [4, 5]])
x

array([[0, 1],
       [2, 3],
       [4, 5]])

In [67]:
np.transpose(x)

array([[0, 2, 4],
       [1, 3, 5]])