# Lab 2 Assigment

Import relevant modules

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
import scipy.io
import numpy as np
from sklearn.model_selection import train_test_split
import os
import time

In [None]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Read dataset from path
pathname = '/content/drive/MyDrive/CZ3005 Lab 2/OQC.mat'
mat = scipy.io.loadmat(pathname)

In [None]:
#Print Data 
full_dataset = mat['data']
full_dataset

array([[0.39052473, 0.14779758, 0.1563981 , ..., 0.07962529, 0.92246927,
        2.        ],
       [0.39052473, 0.14779758, 0.1563981 , ..., 0.05367681, 0.92148159,
        2.        ],
       [0.5084938 , 0.14779758, 0.1563981 , ..., 0.05611241, 0.9204939 ,
        2.        ],
       ...,
       [0.54001505, 0.00982888, 0.77251185, ..., 0.05611241, 0.93135813,
        0.        ],
       [0.60664394, 0.00982888, 0.77251185, ..., 0.05658079, 0.93086416,
        0.        ],
       [0.7189506 , 0.00982888, 0.77251185, ..., 0.05658079, 0.93086416,
        0.        ]])

In [None]:
print("Size of full dataset: ",len(full_dataset))

Size of full dataset:  2952


In [None]:
x, y = full_dataset, range(len(full_dataset))

In [None]:
train, test =train_test_split(x,test_size=0.3)

In [None]:
def splitXY(dataset):
  a0 = 0 #Normal
  a1 = 0 #Weaving
  a2 = 0 #Short Forming

  x = [] #The 48 parameters
  y = [] #Represents the state of molding

  for dataline in dataset:
    i = len(dataline)-1
    if (dataline[i]==0):
      a0+=1
    elif (dataline[i]==1):
      a1+=1
    elif (dataline[i]==2):
      a2+=1
    x.append(dataline[:-1])
    y.append(dataline[i])
  return x,y

In [None]:
x_train, y_train = splitXY(train)
x_test, y_test = splitXY(test)

In [None]:
x_train, y_train, x_test, y_test = map(torch.tensor, (x_train, y_train, x_test, y_test))

In [None]:
#Convert data types from double to float
x_train = x_train.float()
y_train = y_train.long()


x_test = x_test.float()
y_test = y_test.long()

In [None]:
print("Train Set: ", x_train.shape, y_train.shape)
print("Test Set: ", x_test.shape, y_test.shape)

Train Set:  torch.Size([2066, 48]) torch.Size([2066])
Test Set:  torch.Size([886, 48]) torch.Size([886])


## Task 1

You are asked to build a three-layer feed-forward neural network to solve the monitoring problem of injection molding machine. 

Your implementation must be in Pytorch and executable in Google Colab environments. 

The proportion of training and testing samples is 70:30 where your model must deliver the  smallest  testing  error  possible.    

In  that  case,  you  need  to  select  the  number  of  nodes  of  hidden layers,  the  number  of  epochs,  the  learning  rates,  the  mini-batch  size,  etc.  that  lead  to  the  smallest testing error. 

In this assignment, you have to use the SGD optimizer as exemplified in the lab materials under the mini-batch update fashion. 

The evaluation metric here is the classification error. 

No feature selection is allowed here. 

In [None]:
EPOCHS = 1000
LR = 0.4
BATCH_SIZE = 64

input_dim = 48
hidden_dim = 128
output_dim = 3
HIDDEN_LAYERS = 3

loss_fn = F.cross_entropy

In [None]:
class ourDataset(Dataset):
  def __init__(self,x,y):
    self.x = x
    self.y = y
    self.length = self.x.shape[0]  
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]  
  def __len__(self):
    return self.length

In [None]:
device = torch.device("cuda")

x_train=x_train.to(device)
y_train=y_train.to(device)
x_test=x_test.to(device)
y_test=y_test.to(device)

In [None]:
train_dataset = ourDataset(x_train, y_train)
test_dataset = ourDataset(x_test, y_test)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=BATCH_SIZE, 
                                          shuffle=False)

In [None]:
class FFNetwork(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim, hidden_layers=3):
    super().__init__()
    torch.manual_seed(0)
    self.layers = []
    self.layers.append(nn.Linear(input_dim, hidden_dim))
    self.layers.append(nn.ReLU())
    for i in range(hidden_layers-1):
      self.layers.append(nn.Linear(hidden_dim, hidden_dim))
      self.layers.append(nn.ReLU())
    self.layers.append(nn.Linear(hidden_dim, output_dim))
    self.layers.append(nn.Softmax())

    self.net = nn.Sequential(*self.layers)

  def forward(self, X):
    return self.net(X)

  def predict(self, X):
    Y_pred = self.forward(X)
    return Y_pred

In [None]:
def accuracy(y_hat, y):
  pred = torch.argmax(y_hat, dim=1)
  return (pred == y).float().mean()

In [None]:
def fit(x, y, model, opt, loss_fn, epochs = 1000):  
  start = time.time()
  losses = []
  for epoch in range(epochs):
    for i,(x_value,y_value) in enumerate(train_loader):
      loss = loss_fn(model(x_value), y_value)
      losses.append(loss.item())
      loss.backward()
      opt.step()
      opt.zero_grad()
    if (epoch%100==0):
      print("[%d/%d] mean_loss : %0.2f training_acc : %0.2f" %(epoch, epochs, np.mean(losses), accuracy(model(x),y_train)))
  end = time.time()
  timeTaken = end - start
  print("Execution Time: ", "{:.2f}".format(timeTaken),"s")
  return loss.item()

In [None]:
fn = FFNetwork(input_dim, hidden_dim, output_dim)
fn.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=3, bias=True)
    (7): Softmax(dim=None)
  )
)

In [None]:
opt = optim.SGD(fn.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn, opt, loss_fn, EPOCHS))

  input = module(input)


[0/1500] mean_loss : 0.55 training_acc : 1.00
[100/1500] mean_loss : 0.55 training_acc : 1.00
[200/1500] mean_loss : 0.55 training_acc : 1.00
[300/1500] mean_loss : 0.55 training_acc : 1.00
[400/1500] mean_loss : 0.55 training_acc : 1.00
[500/1500] mean_loss : 0.55 training_acc : 1.00
[600/1500] mean_loss : 0.55 training_acc : 1.00
[700/1500] mean_loss : 0.55 training_acc : 1.00
[800/1500] mean_loss : 0.55 training_acc : 1.00
[900/1500] mean_loss : 0.55 training_acc : 1.00
[1000/1500] mean_loss : 0.55 training_acc : 1.00
[1100/1500] mean_loss : 0.55 training_acc : 1.00
[1200/1500] mean_loss : 0.55 training_acc : 1.00
[1300/1500] mean_loss : 0.55 training_acc : 1.00
[1400/1500] mean_loss : 0.55 training_acc : 1.00
Execution Time:  165.70 s
Final loss 0.5514451265335083


In [None]:
y_pred_train = fn.predict(x_train)
y_pred_val = fn.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9589, device='cuda:0')
Validation accuracy tensor(0.9673, device='cuda:0')


# Task 2

You are asked to study the effect of network structure: hidden nodes, hidden layers to the classification performance.  
That  is,  you  try  different  network  configurations  and  understand  the  patterns. 
Your experiments have to be well-documented in your Jupyter notebook file and your report. 
It has to cover different aspects of network configurations such as shallow network, wide network, deep network etc.

### Effect of number of Hidden Layers 

In [None]:
HIDDEN_LAYERS = 5

In [None]:
fn_hl = FFNetwork(input_dim, hidden_dim, output_dim, HIDDEN_LAYERS)
fn_hl.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=3, bias=True)
    (11): Softmax(dim=None)
  )
)

In [None]:
opt_hl = optim.SGD(fn_hl.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn_hl, opt_hl, loss_fn, EPOCHS))

  input = module(input)


[0/1000] mean_loss : 1.10 training_acc : 0.36
[100/1000] mean_loss : 0.65 training_acc : 0.96
[200/1000] mean_loss : 0.62 training_acc : 0.96
[300/1000] mean_loss : 0.61 training_acc : 0.96
[400/1000] mean_loss : 0.60 training_acc : 0.96
[500/1000] mean_loss : 0.60 training_acc : 0.96
[600/1000] mean_loss : 0.60 training_acc : 0.96
[700/1000] mean_loss : 0.60 training_acc : 0.96
[800/1000] mean_loss : 0.60 training_acc : 0.96
[900/1000] mean_loss : 0.60 training_acc : 0.96
Final loss 0.5514453053474426


In [None]:
y_pred_train = fn_hl.predict(x_train)
y_pred_val = fn_hl.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9637, device='cuda:0')
Validation accuracy tensor(0.9560, device='cuda:0')


From our tests through the different hidden layers, we find that having 5 hidden layers gives the optimal validation and training accuracy.



### Effect of Size of Hidden Nodes

In [None]:
hidden_dim = 128

In [None]:
fn_hn = FFNetwork(input_dim, hidden_dim, output_dim, HIDDEN_LAYERS)
fn_hn.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=3, bias=True)
    (11): Softmax(dim=None)
  )
)

In [None]:
opt_hn = optim.SGD(fn_hn.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn_hn, opt_hn, loss_fn, EPOCHS))

  input = module(input)


[0/1000] mean_loss : 1.10 training_acc : 0.36
[100/1000] mean_loss : 0.65 training_acc : 0.96
[200/1000] mean_loss : 0.62 training_acc : 0.96
[300/1000] mean_loss : 0.61 training_acc : 0.96
[400/1000] mean_loss : 0.60 training_acc : 0.96
[500/1000] mean_loss : 0.60 training_acc : 0.96
[600/1000] mean_loss : 0.60 training_acc : 0.96
[700/1000] mean_loss : 0.60 training_acc : 0.96
[800/1000] mean_loss : 0.60 training_acc : 0.96
[900/1000] mean_loss : 0.60 training_acc : 0.96
Final loss 0.551445722579956


In [None]:
y_pred_train = fn_hn.predict(x_train)
y_pred_val = fn_hn.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9637, device='cuda:0')
Validation accuracy tensor(0.9560, device='cuda:0')


We find that with having 128 as the number of hidden nodes is the most optimal in getting the highest validation and training accuracy.

# Task 3

You are asked to study the effect of learning rates. 

As with Task 2, your experiments have to be well-documented. 
You need to give correct conclusion and give suggestion how learning rates should be set.  

This  includes  possible  adaptive  learning  rates  where  the  value  increases  or  decreases  as  the increase of epochs. 

### Effects of Learning Rates

In [None]:
LR = 0.5

In [None]:
fn_lr = FFNetwork(input_dim, hidden_dim, output_dim, HIDDEN_LAYERS)
fn_lr.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=3, bias=True)
    (11): Softmax(dim=None)
  )
)

In [None]:
opt_lr = optim.SGD(fn_lr.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn_lr, opt_lr, loss_fn, EPOCHS))

  input = module(input)


[0/1000] mean_loss : 1.10 training_acc : 0.36
[100/1000] mean_loss : 0.65 training_acc : 0.96
[200/1000] mean_loss : 0.62 training_acc : 0.96
[300/1000] mean_loss : 0.61 training_acc : 0.96
[400/1000] mean_loss : 0.60 training_acc : 0.96
[500/1000] mean_loss : 0.60 training_acc : 0.96
[600/1000] mean_loss : 0.60 training_acc : 0.96
[700/1000] mean_loss : 0.60 training_acc : 0.96
[800/1000] mean_loss : 0.60 training_acc : 0.96
[900/1000] mean_loss : 0.60 training_acc : 0.96
Execution Time:  148.32 s
Final loss 0.5514464974403381


In [None]:
y_pred_train = fn_lr.predict(x_train)
y_pred_val = fn_lr.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9589, device='cuda:0')
Validation accuracy tensor(0.9673, device='cuda:0')


We find that learning rate of 0.3 is optimal for finding the optimal validation accuracy and training accuracy.

### Effects of Number of Epochs

In [None]:
EPOCHS = 1000

In [None]:
fn_ep = FFNetwork(input_dim, hidden_dim, output_dim, HIDDEN_LAYERS)
fn_ep.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=3, bias=True)
    (11): Softmax(dim=None)
  )
)

In [None]:
opt_ep = optim.SGD(fn_ep.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn_ep, opt_ep, loss_fn, EPOCHS))

  input = module(input)


[0/1250] mean_loss : 1.10 training_acc : 0.37
[100/1250] mean_loss : 0.65 training_acc : 0.96
[200/1250] mean_loss : 0.62 training_acc : 0.96
[300/1250] mean_loss : 0.61 training_acc : 0.96
[400/1250] mean_loss : 0.61 training_acc : 0.96
[500/1250] mean_loss : 0.60 training_acc : 0.96
[600/1250] mean_loss : 0.60 training_acc : 0.96
[700/1250] mean_loss : 0.60 training_acc : 0.96
[800/1250] mean_loss : 0.60 training_acc : 0.96
[900/1250] mean_loss : 0.60 training_acc : 0.96
[1000/1250] mean_loss : 0.60 training_acc : 0.96
[1100/1250] mean_loss : 0.60 training_acc : 0.96
[1200/1250] mean_loss : 0.60 training_acc : 0.96
Execution Time:  182.56 s
Final loss 0.5514468550682068


In [None]:
y_pred_train = fn_ep.predict(x_train)
y_pred_val = fn_ep.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9608, device='cuda:0')
Validation accuracy tensor(0.9628, device='cuda:0')


We have found that 1000 is the optimal number of epochs.


# Task 4

You are asked to study the effect of mini-batch size. 

You can set mini-batch size to be 1 (stochastic gradient  descent),  N  (batch  gradient  descent)  or  any  other  size.  

The  most  important  aspect  is  to  be conclusive with your finding. 

The mini-batch size really depends on the problem size. 

In [None]:
BATCH_SIZE = 256

In [None]:
fn_bs = FFNetwork(input_dim, hidden_dim, output_dim, HIDDEN_LAYERS)
fn_bs.to(device)

FFNetwork(
  (net): Sequential(
    (0): Linear(in_features=48, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=3, bias=True)
    (11): Softmax(dim=None)
  )
)

In [None]:
opt_bs = optim.SGD(fn_bs.parameters(), lr=LR)

In [None]:
print('Final loss', fit(x_train, y_train, fn_bs, opt_bs, loss_fn, EPOCHS))

  input = module(input)


[0/1000] mean_loss : 1.10 training_acc : 0.37
[100/1000] mean_loss : 0.65 training_acc : 0.96
[200/1000] mean_loss : 0.62 training_acc : 0.96
[300/1000] mean_loss : 0.61 training_acc : 0.96
[400/1000] mean_loss : 0.61 training_acc : 0.96
[500/1000] mean_loss : 0.60 training_acc : 0.96
[600/1000] mean_loss : 0.60 training_acc : 0.96
[700/1000] mean_loss : 0.60 training_acc : 0.96
[800/1000] mean_loss : 0.60 training_acc : 0.96
[900/1000] mean_loss : 0.60 training_acc : 0.96
Execution Time:  145.53 s
Final loss 0.5514452457427979


In [None]:
y_pred_train = fn_bs.predict(x_train)
y_pred_val = fn_bs.predict(x_test)
accuracy_train = accuracy(y_pred_train, y_train)
accuracy_val = accuracy(y_pred_val, y_test)

  input = module(input)


In [None]:
print("Training accuracy", (accuracy_train))
print("Validation accuracy",(accuracy_val))

Training accuracy tensor(0.9613, device='cuda:0')
Validation accuracy tensor(0.9616, device='cuda:0')


We find that a batch size of 256 is the optimal input.

### Optimal Parameters

In [None]:
HIDDEN_LAYERS = 5
hidden_dim = 128

LR = 0.3
EPOCHS = 1000

BATCH_SIZE = 256