# Question 6

In [40]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn

# Obtain the training and validation data
training = pd.read_csv('train_data.csv').to_numpy()
x_train = training[:,0:2]
y_train = training[:,2]
print("Training Data")
print(training[0:10])

print(20*'-')

validation = pd.read_csv('validate_data.csv').to_numpy()
x_val = validation[:,0:2]
y_val = validation[:,2]
print("Validation Data")
print(validation[0:10])

# Perform data normalization
def zero_mean_unit_var(x):
    x_mean = np.mean(x, axis=0)
    x_std = np.std(x, axis=0)
    x = (x-x_mean)/x_std
    return x

def min_max(x):
    x_min = np.min(x, axis=0)
    x_max = np.max(x, axis=0)
    x = (x-x_min)/(x_max-x_min)
    return x

print (20*'-')
print (min_max(x_train))

# Activation function: Sigmoid
def activation(x):
    return 1/(1+np.exp(-x))

# Derivative of the activation function
def activation_derivative(x):
    return activation(x)*(1-activation(x)) 

def relu(x):
    return np.maximum(0,x)

# Initialize the weights and biases
def initialize_weights_biases( n_input, n_hidden, n_output ):
    # Weights
    w1 = np.random.randn(n_input, n_hidden)
    w2 = np.random.randn(n_hidden, n_output)
    # Biases
    b1 = np.random.randn(n_hidden)
    b2 = np.random.randn(n_output)
    return w1, w2, b1, b2
    

Training Data
[[2.91721124e+03 3.28952253e+03 0.00000000e+00]
 [1.88893772e+03 7.81528356e+02 0.00000000e+00]
 [4.18852141e+03 1.55447626e+03 0.00000000e+00]
 [8.14555534e+03 9.80406673e+03 0.00000000e+00]
 [9.58448898e+03 6.17633719e+03 0.00000000e+00]
 [4.03963376e+03 1.67607143e+02 0.00000000e+00]
 [2.39902078e+03 6.25189167e+03 1.00000000e+00]
 [5.89987684e+03 7.73251478e+03 0.00000000e+00]
 [1.83251779e+03 1.30073027e+03 0.00000000e+00]
 [1.61228897e+03 6.42678176e+03 1.00000000e+00]]
--------------------
Validation Data
[[6.81791883e+03 7.86357780e+03 0.00000000e+00]
 [6.87936668e+03 3.90264518e+03 1.00000000e+00]
 [8.93210750e+02 6.13757242e+03 1.00000000e+00]
 [9.02868713e+03 7.44265682e+03 0.00000000e+00]
 [3.86222856e+03 7.46752602e+02 0.00000000e+00]
 [7.54177783e+03 6.94448705e+02 1.00000000e+00]
 [1.58208746e+03 7.46484261e+03 1.00000000e+00]
 [5.78893578e+02 7.18144291e+03 0.00000000e+00]
 [5.61728665e+03 4.28307909e+03 1.00000000e+00]
 [2.96496435e+03 3.66421328e+03 0.00

In [4]:
# Compute the mean squared error loss using the predicted op and the true y
def compute_loss(pred_op, y_T):
     return np.mean((pred_op-y_T)**2)/ np.size(y_T)

#Compute the cross entropy loss using the predicted op and the true y
def CE_loss(pred_op, y_T):
    return -np.mean(y_T*np.log(pred_op) + (1-y_T)*np.log(1-pred_op))

In [5]:
# TODO: Validate the code below. Not sure if batching is done correctly.
# Author: Jesse (GCP).

def make_batches(x_train, y_train, batch_size):
    x_batches = []
    y_batches = []

    for i in range(0, len(x_train), batch_size):
        x_batches.append(x_train[i:i+batch_size])
        y_batches.append(y_train[i:i+batch_size])

    return x_batches, y_batches

In [6]:

# Implement the forward pass for backpropagation of the MLP
def forward_pass(X, W1, W2, W3, b1, b2, b3):
    # Calculate the activations of the first hidden layer
    h1 = relu(np.dot(X, W1) + b1)
    
    # Calculate the activations of the second hidden layer
    h2 = relu(np.dot(h1, W2) + b2)
    
    # Calculate the activations of the output layer
    h3 = activation(np.dot(h2, W3) + b3)
    
    # Return the activations
    return h1, h2, h3


In [7]:
# TO DO 
# Implement the backward pass for backpropagation of the MLP
def backward_pass(x, y, h1, h2, h3, W1, W2, W3, b1, b2, b3, loss_fn):  
    # Calculate the loss function
    loss = loss_fn(h3, y)
    
    # Calculate the gradients of the loss function in regards to the output layer
    dL3 = h3 - y
    dW3 = np.dot(h2.T, dL3)
    db3 = np.sum(dL3, axis=0)
        
    # Calculate the gradients of the loss function in regards to the first hidden layer
    dL1 = np.dot(dL1, W2.T)
    dW1 = np.dot(x.T, dL1)
    db1 = np.sum(dL1, axis=0)
    
    # Calculate the gradients of the loss function in regards to the second hidden layer
    dL2 = np.dot(dL2, W3.T)
    dW2 = np.dot(h1.T, dL2)
    db2 = np.sum(dL2, axis=0)
    
    # Return the gradients in a dictionary format
    return {'dW3': dW3, 'db3': db3, 'dW2': dW2, 'db2': db2, 'dW1': dW1, 'db1': db1, 'loss': loss}

In [30]:
def update_weights(weights, grads, lr):
  """
  weights: dictionary of weights
  grads: dictionary of gradients
  lr: learning rate
  """

  # Update the weights and biases using the gradients
  weights['W1'] -= lr * grads['dW1']
  weights['b1'] -= lr * grads['db1']
  weights['W2'] -= lr * grads['dW2']
  weights['b2'] -= lr * grads['db2']  
  weights['W3'] -= lr * grads['dW3']
  weights['b3'] -= lr * grads['db3']  


  return

In [35]:
# To Do
# define you stopping criteria for training
def stopping_criteria():
  stop = False
  return stop

In [36]:
# TO DO
# Define the hyperparameters. Add any additional hyperparameters you might need
lr = 0.1  # learning rate
batch_size = np.size(x_train) # batch size
num_epochs = 200 # number of epochs

In [37]:
def train(x_train, y_train, lr, batch_size, num_epochs):
  """
  Train the MLP.
  """

  n_input = 2 # number of input features
  n_hidden = 2 # number of neurons in the hidden layers
  n_output = 1 # number of output features

  # Initialize the weights and biases
  W1, b1, W2, b2 = initialize_weights_biases(n_input, n_hidden, n_output)

  # Initialize the weights dictionary
  weights = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

  # Make batches
  x_batches, y_batches = make_batches(x_train, y_train, batch_size)

  while not stopping_criteria():
      # get a batch from the data
      index = np.random.randint(0, len(x_batches))
      x_batch, y_batch = x_batches[index], y_batches[index]

      # forward the batch through the network layers
      predicted_op = forward_pass(x_batch, weights)

      # compute the loss
      loss = compute_loss(predicted_op, y_batch)

      # perform backward pass
      grads = backward_pass(x_batch, y_batch, predicted_op, weights, loss_fn)

      # update the weights of the network
      update_weights(weights, grads, lr)


In [38]:
# Train the network with the selected hyperparameters
train(x_train, y_train, lr, batch_size, num_epochs)

# Print data from the network


TypeError: forward_pass() missing 5 required positional arguments: 'W2', 'W3', 'b1', 'b2', and 'b3'