<a href="https://colab.research.google.com/github/Joana-Mansa/2021-Phonebook/blob/master/first_neural_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FIRST NEURAL NETWORK (using Numpy)

Building Neural Networks from scratch in 9 steps

Data Pre-processing(1-4)
1. Initialisation (import the libraries)
2.  Data generation
3. train-test splitting
4. Data standardization
5. Neural network construction
6. Forward Propagation
7. Back-Propagation
8. Iterative optimisation
9. Testing






# Data Pre-Processing

# 1. Initialising

In [1]:
#Initialization
import numpy as np
np.random.seed(42) #allows numpy to generate the pseudo numbers

# 2. Generating the data

In [3]:
X_num_row, X_num_col = [2,10000] 
# row represents the number of features, columns represents datum points
X_raw = np.random.rand(X_num_row, X_num_col) *100
X_raw

array([[72.9998311 , 18.45119956, 34.66396944, ...,  1.94558628,
        40.10048425, 25.73979791],
       [63.81445684, 45.92924535, 96.44985249, ..., 26.95694318,
        43.43197722, 48.74236985]])

In [5]:
y_raw = np.concatenate(([(X_raw[0,:] + X_raw[1,:])], [(X_raw[0,:] - X_raw[1,:])], np.abs([(X_raw[0,:]- X_raw[1,:])])))

#for input a and b, output is a+b, a-b and |a-b|
y_num_row, y_num_col = y_raw.shape
y_raw.shape


(3, 10000)

# 3. Splitting train-test dataset

In [12]:
train_ratio = 0.7
num_train_datum = int(train_ratio*X_num_col)
X_raw_train = X_raw[:,0:num_train_datum]
X_raw_test = X_raw[:,num_train_datum:]

y_raw_train = y_raw[:,0:num_train_datum]
y_raw_test = y_raw[:,num_train_datum:]

## 4. Standardize data

In [17]:
class scaler:
  def __init__(self, mean, std):
    self.mean = mean
    self.std = std

def get_scaler(row):
  mean = np.mean(row)
  std = np.std(row)
  return scaler (mean, std)

def standardize(data, scaler):
  return (data - scaler.mean)/ scaler.std

def unstandardize(data, scaler):
  return (data * scaler.std) + scaler.mean

In [18]:
# Construct scalers from training set

X_scalers = [get_scaler(X_raw_train[row,:]) for row in range(X_num_row)]
X_train = np.array([standardize(X_raw_train[row,:], X_scalers[row]) for row in range(X_num_row)])

y_scalers = [get_scaler(y_raw_train[row,:]) for row in range(y_num_row)]
y_train = np.array([standardize(y_raw_train[row,:], y_scalers[row]) for row in range(y_num_row)])

In [66]:
# apply those scalers to testing set

X_test = np.array([standardize(X_raw_test[row,:], X_scalers[row]) for row in range(X_num_row)])
y_test = np.array([standardize(y_raw_test[row,:], y_scalers[row]) for row in range(y_num_row)])

In [67]:
#check if data has been standardised 

print([X_train[row,:].mean() for row in range(X_num_row)]) #should be close to 0
print([X_train[row,:].std() for row in range(X_num_row)]) #should be close to 1

print([y_train[row,:].mean() for row in range(y_num_row)]) #should be close to 0
print([y_train[row,:].std() for row in range(y_num_row)]) # should be close to 1

[-1.6240976817373719e-16, -6.293378516732316e-17]
[1.0, 0.9999999999999999]
[1.4210854715202004e-17, -5.075305255429287e-18, -6.496390726949488e-17]
[1.0, 1.0, 1.0]


# 5. Constructing a neural net

In [68]:
class layer:
  def __init__(self, layer_index, is_output, input_dim, output_dim, activation):
    self.layer_index = layer_index #0 indicates input layer
    self.is_output = is_output #true indicates output layers, false otherwise
    self.input_dim = input_dim
    self.output_dim = output_dim
    self.activation = activation

    #the multiplication constant is sort of arbitrary
    if layer_index !=0:
      self.W = np.random.randn(output_dim, input_dim) * np.sqrt(2/input_dim)
      self.b = np.random.randn(output_dim,1)* np.sqrt(2/input_dim)

In [69]:
# change layers_dim to configure your own neural net!
layers_dim = [X_num_row, 4, 4, y_num_row] # input layer ---hidden layer --- output layers
neural_net = []

#construct the net layer
for layer_index in range(len(layers_dim)):
  if layer_index == 0: #if input layer
    neural_net.append(layer(layer_index, False, 0, layers_dim[layer_index], 'irrelevant'))
  elif layer_index+1 == len(layers_dim): #if output layer
    neural_net.append(layer(layer_index, True, layers_dim[layer_index-1], layers_dim[layer_index], activation='linear'))
  else:
    neural_net.append(layer(layer_index, False, layers_dim[layer_index-1], layers_dim[layer_index], activation='relu'))
    




In [70]:
#simple check on overfitting


pred_n_param = sum([(layers_dim[layer_index]+1)*layers_dim[layer_index+1] for layer_index in range(len(layers_dim)-1)])
act_n_param = sum([neural_net[layer_index].W.size + neural_net[layer_index].b.size for layer_index in range(1, len(layers_dim))])
print(f'Predicted number of hyperparameters: {pred_n_param}')
print(f'Actual number of hyperparameters: {act_n_param}')
print(f'Number of data: {X_num_col}')

if act_n_param >= X_num_col:
  raise Exception('It will overfit')

Predicted number of hyperparameters: 47
Actual number of hyperparameters: 47
Number of data: 10000


# 6. Performing Forward propagation

In [71]:
def activation(input_, act_func):
  if act_func == 'relu':
    return np.maximum(input_, np.zeros(input_.shape))
  elif act_func == 'linear':
    return input_
  else:
    raise Exception('Activation function is not defined.')

In [72]:
def forward_prop(input_vec, layers_dim=layers_dim, neural_net=neural_net):
  neural_net[0].A = input_vec #Define A in input layer for for-loop convenience
  for layer_index in range(1,len(layers_dim)):
    neural_net[layer_index].Z = np.add(np.dot(neural_net[layer_index].W, neural_net[layer_index-1].A), neural_net[layer_index].b)
    neural_net[layer_index].A = activation(neural_net[layer_index].Z, neural_net[layer_index].activation)
  return neural_net[layer_index].A

In [73]:
#test run
forward_prop(X_train).shape == y_train.shape #should be true

True

# 7. Perform back propagation

In [74]:
def get_loss(y, y_hat, metric='mse'):
    if metric == 'mse':
        individual_loss = 0.5 * (y_hat - y) ** 2
        return np.mean([np.linalg.norm(individual_loss[:,col], 2) for col in range(individual_loss.shape[1])])
    else:
        raise Exception('Loss metric is not defined.')

def get_dZ_from_loss(y, y_hat, metric):
    if metric == 'mse':
        return y_hat - y
    else:
        raise Exception('Loss metric is not defined.')

def get_dactivation(A, act_func):
    if act_func == 'relu':
        return np.maximum(np.sign(A), np.zeros(A.shape)) # 1 if backward input >0, 0 otherwise; then diaganolize
    elif act_func == 'linear':
        return np.ones(A.shape)
    else:
        raise Exception('Activation function is not defined.')

In [75]:
def backward_prop(y, y_hat, metric='mse', layers_dim=layers_dim, neural_net=neural_net, num_train_datum=num_train_datum):
    for layer_index in range(len(layers_dim)-1,0,-1):
        if layer_index+1 == len(layers_dim): # if output layer
            dZ = get_dZ_from_loss(y, y_hat, metric)
        else: 
            dZ = np.multiply(np.dot(neural_net[layer_index+1].W.T, dZ), 
                             get_dactivation(neural_net[layer_index].A, neural_net[layer_index].activation))
        dW = np.dot(dZ, neural_net[layer_index-1].A.T) / num_train_datum
        db = np.sum(dZ, axis=1, keepdims=True) / num_train_datum
        
        neural_net[layer_index].dW = dW
        neural_net[layer_index].db = db

# 8. Optimize Iteratively (Gradient Descent)

In [76]:
learning_rate = 0.01
max_epoch = 1000000

for epoch in range(1,max_epoch+1):
    y_hat_train = forward_prop(X_train) # update y_hat
    backward_prop(y_train, y_hat_train) # update (dW,db)
    
    for layer_index in range(1,len(layers_dim)):        # update (W,b)
        neural_net[layer_index].W = neural_net[layer_index].W - learning_rate * neural_net[layer_index].dW
        neural_net[layer_index].b = neural_net[layer_index].b - learning_rate * neural_net[layer_index].db
    
    if epoch % 100000 == 0:
        print(f'{get_loss(y_train, y_hat_train):.4f}')

0.3360
0.3349
0.3349
0.3349
0.3349
0.3349
0.3349
0.3349
0.3349
0.3349


In [77]:
# test loss
get_loss(y_test, forward_prop(X_test))

0.3469461475376623

In [82]:
def predict(X_raw_any):
    X_any = np.array([standardize(X_raw_any[row,:], X_scalers[row]) for row in range(X_num_row)])
    y_hat = forward_prop(X_any)
    y_hat_any = np.array([unstandardize(y_hat[row,:], y_scalers[row]) for row in range(y_num_row)])
    return y_hat_any

In [83]:
predict(np.array([[30,70],[70,30],[3,5],[888,122]]).T)



array([[105.90035702,  96.82669242,   4.66089373, 112.74597954],
       [-14.31562862,  26.18649919, -16.53548493,  93.73617085],
       [ 25.37979978,  47.86301628,  10.27389566,  94.33391677]])