In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import scipy.io
import os
import numpy as np
from sklearn.model_selection import train_test_split
import time
import math

from collections import defaultdict
import plotly.graph_objects as go
from tqdm import tqdm

# Exploratory Data Analysis

In [2]:
def reset_random_seeds(seed=1):
    '''
    Sets all necessary seed for reproduceability.
    '''
    os.environ['PYTHONHASHSEED']=str(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

reset_random_seeds()

In [3]:
FILE_PATH = '../../data/'

# Open table of column names text file, get the column names
f = open(FILE_PATH+'col_names.txt')
cols = f.read()
cols = [' '.join(c.split()[1:]) for c in cols.split('\n')] + ['LABEL']

# Read dataset and convert to dataframe
data = scipy.io.loadmat(FILE_PATH+'OQC.mat')
df = pd.DataFrame(data['data'], columns=cols)
# Shuffle pandas dataframe
df = df.sample(frac=1, random_state=1).reset_index(drop=True)

print("Unique labels:", df['LABEL'].unique())
df.head()

Unique labels: [1. 0. 2.]


Unnamed: 0,"SCREW VOLUME, ACTUAL VALUE","MATERIAL CUSHION, ACTUAL VALUE","DOSAGE TIME, ACTUAL VALUE","CYCLE TIME, ACTUAL VALUE","MOULD HEATING CIRCUIT 1, ACTUAL VALUE","MOULD HEATING CIRCUIT 2, ACTUAL VALUE","MOULD HEATING CIRCUIT 3, ACTUAL VALUE","MOULD HEATING CIRCUIT 4, ACTUAL VALUE","MOULD HEATING CIRCUIT 5, ACTUAL VALUE","MOULD HEATING CIRCUIT 6, ACTUAL VALUE",...,"DOSAGE TORQUE, ACTUAL VALUE","DOSAGE ROTATIONAL SPEED, ACTUAL VALUE","HYDRAULIC ACCUMULATOR PRESSURE, ACTUAL VALUE","CHARGE PRESSURE OF ACCUMULATOR, MEASURED VALUE","MOULD-ENTRY TIME, ACTUAL VALUE","PART REMOVAL TIME, ACTUAL VALUE","MAXIMUM INJECTION PRESSURE, ACTUAL VALUE","BACK PRESSURE, ACTUAL","CLAMPING FORCE, ACTUAL",LABEL
0,0.267648,0.080815,0.620853,0.000113,0.666667,0.666667,0.666667,0.714286,0.5,0.785714,...,0.682513,0.037255,0.032894,0.0,0.0,0.0,0.196721,0.196721,0.925926,1.0
1,1.0,0.121951,0.772512,0.001732,0.222222,0.333333,0.222222,0.428571,0.25,0.5,...,0.696095,0.037255,0.166076,1.0,3.8e-05,0.125,0.026042,0.026042,0.006914,1.0
2,0.473009,0.065162,0.819905,0.000113,0.666667,0.444444,0.444444,0.857143,0.5,0.214286,...,0.879457,0.994118,0.595073,0.0,7.5e-05,0.25,0.057892,0.056674,0.927901,0.0
3,1.0,0.106662,0.535545,0.001016,0.333333,0.333333,0.333333,0.285714,0.5,0.785714,...,0.70798,0.037255,0.483439,1.0,0.000113,0.375,0.025761,0.025761,0.013333,1.0
4,0.300113,0.030579,0.649289,0.002409,0.666667,0.888889,0.555556,1.0,0.25,0.5,...,0.772496,0.052941,0.680918,1.0,3.8e-05,0.125,0.058361,0.058361,0.912099,0.0


In [4]:
# Find if there are any duplicated rows. If there are, make sure they dont belong in both
# train and test set.
duplicated_idx = [i for i,k in enumerate(df.duplicated()) if k]
df.iloc[duplicated_idx]

Unnamed: 0,"SCREW VOLUME, ACTUAL VALUE","MATERIAL CUSHION, ACTUAL VALUE","DOSAGE TIME, ACTUAL VALUE","CYCLE TIME, ACTUAL VALUE","MOULD HEATING CIRCUIT 1, ACTUAL VALUE","MOULD HEATING CIRCUIT 2, ACTUAL VALUE","MOULD HEATING CIRCUIT 3, ACTUAL VALUE","MOULD HEATING CIRCUIT 4, ACTUAL VALUE","MOULD HEATING CIRCUIT 5, ACTUAL VALUE","MOULD HEATING CIRCUIT 6, ACTUAL VALUE",...,"DOSAGE TORQUE, ACTUAL VALUE","DOSAGE ROTATIONAL SPEED, ACTUAL VALUE","HYDRAULIC ACCUMULATOR PRESSURE, ACTUAL VALUE","CHARGE PRESSURE OF ACCUMULATOR, MEASURED VALUE","MOULD-ENTRY TIME, ACTUAL VALUE","PART REMOVAL TIME, ACTUAL VALUE","MAXIMUM INJECTION PRESSURE, ACTUAL VALUE","BACK PRESSURE, ACTUAL","CLAMPING FORCE, ACTUAL",LABEL


In [5]:
# Input shape
np.array(df.iloc[0]).shape

(49,)

In [6]:
device= torch.device("cuda")
# device= torch.device("cpu")
print(device)

cuda


In [7]:
# Count number of each classes. If heavily inbalanced we need to do stratified split. Otherwise
# just splitting by index should be fine.
print(df['LABEL'].value_counts())
Y = df.iloc[:, -1:]
X = df.iloc[:, :-1]
# Train test split with stratification sampling
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=1, shuffle=True, stratify=Y)

# x_train = X.iloc[:int(0.7*len(X))]
# y_train = Y.iloc[:int(0.7*len(Y))]
# x_test = X.iloc[int(0.7*len(X)):]
# y_test = Y.iloc[int(0.7*len(Y)):]


# convert to torch tensors with relevant dtypes.
train_data = torch.tensor(x_train.values).type('torch.cuda.DoubleTensor')
test_data = torch.tensor(x_test.values).type('torch.cuda.DoubleTensor')

train_label = torch.tensor(y_train.values).type('torch.cuda.LongTensor').reshape(len(y_train))
test_label = torch.tensor(y_test.values).type('torch.cuda.LongTensor').reshape(len(y_test))

1.0    1074
0.0    1008
2.0     870
Name: LABEL, dtype: int64


In [8]:
print("Train Labels Distributions: ", y_train.value_counts())
print("Test Labels Distribution: ", y_test.value_counts())

Train Labels Distributions:  LABEL
1.0      752
0.0      705
2.0      609
dtype: int64
Test Labels Distribution:  LABEL
1.0      322
0.0      303
2.0      261
dtype: int64


# Functions

In [9]:
def get_error( scores , labels ):

    bs=scores.size(0)
    predicted_labels = scores.argmax(dim=1)
    indicator = (predicted_labels == labels)
    num_matches=indicator.sum()
    
    return 1-num_matches.float()/bs  

In [10]:
def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
         )

In [11]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2,  output_size):
        super(three_layer_net , self).__init__()

        self.layer1 = nn.Linear(input_size, hidden_size1, bias=False)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
        self.layer3 = nn.Linear(hidden_size2, output_size, bias=False)        
        
    def forward(self, x):
        
        y       = self.layer1(x)
        y_hat   = F.relu(y)
        z       = self.layer2(y_hat)
        z_hat   = F.relu(z)
        scores  = self.layer3(z_hat)
        
        return scores

In [12]:
class twelve_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size,  output_size):
        super(twelve_layer_net , self).__init__()

        self.layer1 = nn.Linear(input_size, hidden_size[0], bias=False)
        self.layer2 = nn.Linear(hidden_size[0], hidden_size[1], bias=False)
        self.layer3 = nn.Linear(hidden_size[1], hidden_size[2], bias=False)   

        self.layer4 = nn.Linear(hidden_size[2], hidden_size[3], bias=False)
        self.layer5 = nn.Linear(hidden_size[3], hidden_size[4], bias=False)
        self.layer6 = nn.Linear(hidden_size[4], hidden_size[5], bias=False)    

        self.layer7 = nn.Linear(hidden_size[5], hidden_size[6], bias=False)
        self.layer8 = nn.Linear(hidden_size[6], hidden_size[7], bias=False)
        self.layer9 = nn.Linear(hidden_size[7], hidden_size[8], bias=False)  

        self.layer10 = nn.Linear(hidden_size[8], hidden_size[9], bias=False)
        self.layer11 = nn.Linear(hidden_size[9], hidden_size[10], bias=False)
        self.layer12 = nn.Linear(hidden_size[10], output_size, bias=False)

    def forward(self, x):
        y1       = self.layer1(x)
        y_hat1   = F.relu(y1)

        y2       = self.layer2(y_hat1)
        y_hat2   = F.relu(y2)

        y3       = self.layer3(y_hat2)
        y_hat3   = F.relu(y3)

        y4       = self.layer4(y_hat3)
        y_hat4   = F.relu(y4)

        y5       = self.layer5(y_hat4)
        y_hat5   = F.relu(y5)

        y6       = self.layer6(y_hat5)
        y_hat6   = F.relu(y6)

        y7       = self.layer7(y_hat6)
        y_hat7   = F.relu(y7)

        y8       = self.layer8(y_hat7)
        y_hat8   = F.relu(y8)

        y9       = self.layer9(y_hat8)
        y_hat9   = F.relu(y9)

        y10       = self.layer10(y_hat9)
        y_hat10   = F.relu(y10)

        y11       = self.layer11(y_hat10)
        y_hat11   = F.relu(y11)

        scores  = self.layer12(y_hat11)
        
        return scores

In [22]:
class ten_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size,  output_size):
        super(ten_layer_net , self).__init__()

        self.layer1 = nn.Linear(input_size, hidden_size[0], bias=False)
        self.layer2 = nn.Linear(hidden_size[0], hidden_size[1], bias=False)
        self.layer3 = nn.Linear(hidden_size[1], hidden_size[2], bias=False)   
        self.layer4 = nn.Linear(hidden_size[2], hidden_size[3], bias=False)

        self.layer5 = nn.Linear(hidden_size[3], hidden_size[4], bias=False)
        self.layer6 = nn.Linear(hidden_size[4], hidden_size[5], bias=False)    
        self.layer7 = nn.Linear(hidden_size[5], hidden_size[6], bias=False)
        self.layer8 = nn.Linear(hidden_size[6], hidden_size[7], bias=False)
        
        self.layer9 = nn.Linear(hidden_size[7], hidden_size[8], bias=False)
        self.layer10 = nn.Linear(hidden_size[8], output_size, bias=False)

    def forward(self, x):
        y1       = self.layer1(x)
        y_hat1   = F.relu(y1)

        y2       = self.layer2(y_hat1)
        y_hat2   = F.relu(y2)

        y3       = self.layer3(y_hat2)
        y_hat3   = F.relu(y3)

        y4       = self.layer4(y_hat3)
        y_hat4   = F.relu(y4)

        y5       = self.layer5(y_hat4)
        y_hat5   = F.relu(y5)

        y6       = self.layer6(y_hat5)
        y_hat6   = F.relu(y6)

        y7       = self.layer7(y_hat6)
        y_hat7   = F.relu(y7)

        y8       = self.layer8(y_hat7)
        y_hat8   = F.relu(y8)

        y9       = self.layer9(y_hat8)
        y_hat9   = F.relu(y9)

        scores  = self.layer10(y_hat9)
        
        return scores

In [14]:
def eval(model, batch_size, trainable, lr):
    running_error = 0
    running_loss = 0

    data_size = len(train_data) if trainable else len(test_data)
    data = train_data if trainable else test_data
    label = train_label if trainable else test_label

    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    for num_batches, count in enumerate(range(math.ceil(data_size/batch_size))):
        if trainable:
            # Set the gradients to zeros
            optimizer.zero_grad()
        
        # Get minibatch data
        minibatch_data = data[count*bs:(count+1)*bs]
        minibatch_label = label[count*bs:(count+1)*bs]

        # Load data to GPU
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device)

        # Record gradients on forward pass if we are training
        if trainable:
            minibatch_data.requires_grad_()
        
        # Pass the data through the model
        scores = model(minibatch_data.float())

        # Compute loss and error
        loss = criterion(scores, minibatch_label)
        

        # Perform backward pass if we are training
        if trainable:
            loss.backward()
            optimizer.step()
            
        error = get_error(scores.detach(), minibatch_label)
        running_loss += loss.detach().item()
        running_error += error.item()

    # Compute loss, error and time taken for this epoch
    total_loss = running_loss/(num_batches+1)
    total_error = running_error/(num_batches+1)

    return total_loss, total_error


In [15]:
def fit(model, batch_size, num_epochs, lr, decay, verbose=0):

    history = defaultdict(list)
    for epoch in range(1, num_epochs+1):
        start = time.time()
        train_loss, train_error = eval(model, batch_size, trainable=True, lr=lr)
        test_loss, test_error = eval(model, batch_size, trainable=False, lr=lr)

        history['train_error'].append(train_error)
        history['train_loss'].append(train_loss)
        history['test_error'].append(test_error)
        history['test_loss'].append(test_loss)
        history['time'].append(time.time()-start)

        if verbose:
            if epoch%verbose==0:
                print(f" Epoch {epoch}, Training Error: {train_error}, Training Loss: {train_loss}, Test Error: {test_error}, Test Loss: {test_loss}, Time Elapsed: {time.time()-start}")

        lr = lr * decay

    return history


In [16]:
def plot_line(title, y_axis_name, x_axis_name, **kwargs):
    fig = go.Figure()
    for name, x in kwargs.items():
    # Create traces
        fig.add_trace(go.Scatter(y=x,
                            mode='lines',
                            name=' '.join(name.split('_'))))
        fig.add_trace(go.Scatter(y=[min(x)], x=[x.index(min(x))], mode='markers', name='Min. Point '+' '.join(name.split('_')), marker=dict(size=[10])))

    fig.update_layout(title=title,
                   xaxis_title=x_axis_name,
                   yaxis_title=y_axis_name)
    return fig

# Task 1

You are asked to build a three-layer feed-forward neural network to solve the monitoring problem of
injection molding machine. Your implementation must be in Pytorch and executable in Google Colab
environments. The proportion of training and testing samples is 70:30 where your model must deliver
the smallest testing error possible. In that case, you need to select the number of nodes of hidden
layers, the number of epochs, the learning rates, the mini-batch size, etc. that lead to the smallest
testing error. In this assignment, you have to use the SGD optimizer as exemplified in the lab materials
under the mini-batch update fashion. The evaluation metric here is the classification error. No feature
selection is allowed here.

In [19]:
task1_network = three_layer_net(48,500,500,3)
task1_network = task1_network.to(device)

display_num_param(task1_network)
print(task1_network)

criterion = nn.CrossEntropyLoss()

bs = 200
history = fit(task1_network, bs, num_epochs=2000, lr=0.01, decay=1, verbose=20)


There are 275500 (0.28 million) parameters in this neural network
three_layer_net(
  (layer1): Linear(in_features=48, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=3, bias=False)
)
 Epoch 20, Training Error: 0.3385537320917303, Training Loss: 1.0208500840447166, Test Error: 0.31276745796203614, Test Loss: 1.0155786514282226, Time Elapsed: 0.020943641662597656
 Epoch 40, Training Error: 0.18202480402859775, Training Loss: 0.8661066564646634, Test Error: 0.15593024492263793, Test Loss: 0.851167869567871, Time Elapsed: 0.0219271183013916
 Epoch 60, Training Error: 0.0878099419853904, Training Loss: 0.6299230347980153, Test Error: 0.07862794399261475, Test Loss: 0.6089390516281128, Time Elapsed: 0.01795172691345215
 Epoch 80, Training Error: 0.08506889776750044, Training Loss: 0.4522665955803611, Test Error: 0.08795351982116699, Test Loss: 0.4325199484825134, Time Elapsed: 0.02092766761779785

In [20]:
fig1 = plot_line('Task 1 Train/Test Errors', 'Error', 'Epochs', Train_Error=history['train_error'], Test_Error=history['test_error'])
fig2 = plot_line('Task 1 Train/Test Loss', 'Loss', 'Epochs', Train_Loss=history['train_loss'], Test_Loss=history['test_loss'])

fig1.show(), fig2.show()

(None, None)

In [17]:
# find any duplicates in the x_train and x_test dataframes
np.array([x for x in set(tuple(x) for x in x_train.values) & set(tuple(x) for x in x_test.values)])

array([], dtype=float64)

In [18]:
# find any duplicates in the test_data tensors and the train_data tensors
for test in tqdm(test_data):
    for train in train_data:
        if (test == train).all():
            print(test)
            break

  2%|▏         | 15/886 [00:02<02:34,  5.63it/s]


KeyboardInterrupt: 

# Task 2

In [24]:
reset_random_seeds()

deep_lean_model = twelve_layer_net(48, [5,5,5,5,4,4,4,4,3,3,3], 3)
deep_lean_model = deep_lean_model.to(device)

print(deep_lean_model)
display_num_param(deep_lean_model)

criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( deep_lean_model.to(device).parameters() , lr=0.1 )
bs = 200

history_deep_lean_model = fit(deep_lean_model, bs, num_epochs=2000, lr=0.01, decay=1, verbose=20)

twelve_layer_net(
  (layer1): Linear(in_features=48, out_features=5, bias=False)
  (layer2): Linear(in_features=5, out_features=5, bias=False)
  (layer3): Linear(in_features=5, out_features=5, bias=False)
  (layer4): Linear(in_features=5, out_features=5, bias=False)
  (layer5): Linear(in_features=5, out_features=4, bias=False)
  (layer6): Linear(in_features=4, out_features=4, bias=False)
  (layer7): Linear(in_features=4, out_features=4, bias=False)
  (layer8): Linear(in_features=4, out_features=4, bias=False)
  (layer9): Linear(in_features=4, out_features=3, bias=False)
  (layer10): Linear(in_features=3, out_features=3, bias=False)
  (layer11): Linear(in_features=3, out_features=3, bias=False)
  (layer12): Linear(in_features=3, out_features=3, bias=False)
)
There are 422 (0.00 million) parameters in this neural network
 Epoch 20, Training Error: 0.6601652882315896, Training Loss: 1.098612286827781, Test Error: 0.6572325706481934, Test Loss: 1.098612380027771, Time Elapsed: 0.0388391017

In [26]:
reset_random_seeds()

deep_wide_model = twelve_layer_net(48, [500]*4 + [400]*4 + [300]*4, 3)
deep_wide_model = deep_wide_model.to(device)

print(deep_wide_model)
display_num_param(deep_wide_model)


criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( deep_wide_model.to(device).parameters() , lr=0.01 )
bs = 200

history_deep_wide_model = fit(deep_wide_model, bs, num_epochs=2000, lr=0.01, decay=1, verbose=20)

twelve_layer_net(
  (layer1): Linear(in_features=48, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=500, bias=False)
  (layer4): Linear(in_features=500, out_features=500, bias=False)
  (layer5): Linear(in_features=500, out_features=400, bias=False)
  (layer6): Linear(in_features=400, out_features=400, bias=False)
  (layer7): Linear(in_features=400, out_features=400, bias=False)
  (layer8): Linear(in_features=400, out_features=400, bias=False)
  (layer9): Linear(in_features=400, out_features=300, bias=False)
  (layer10): Linear(in_features=300, out_features=300, bias=False)
  (layer11): Linear(in_features=300, out_features=300, bias=False)
  (layer12): Linear(in_features=300, out_features=3, bias=False)
)
There are 1754900 (1.75 million) parameters in this neural network
 Epoch 20, Training Error: 0.7038016644391146, Training Loss: 1.0986131646416404, Test Error: 0.7032093167304992, Test Lo

In [27]:
reset_random_seeds()

shallow_lean_model = three_layer_net(48, 4, 3, 3)
shallow_lean_model = shallow_lean_model.to(device)

print(shallow_lean_model)
display_num_param(shallow_lean_model)


criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( shallow_lean_model.to(device).parameters() , lr=0.01 )
bs = 200

history_shallow_lean_model = fit(shallow_lean_model, bs, num_epochs=2000, lr=0.01, decay=1, verbose=20)

three_layer_net(
  (layer1): Linear(in_features=48, out_features=4, bias=False)
  (layer2): Linear(in_features=4, out_features=3, bias=False)
  (layer3): Linear(in_features=3, out_features=3, bias=False)
)
There are 213 (0.00 million) parameters in this neural network
 Epoch 20, Training Error: 0.6314738284457814, Training Loss: 1.0828279581936924, Test Error: 0.6458837270736695, Test Loss: 1.0834231853485108, Time Elapsed: 0.017208337783813477
 Epoch 40, Training Error: 0.5732369477098639, Training Loss: 1.064452669837258, Test Error: 0.5543023467063903, Test Loss: 1.0645866632461547, Time Elapsed: 0.016403913497924805
 Epoch 60, Training Error: 0.6506198427893899, Training Loss: 1.0311125083403154, Test Error: 0.6429069876670838, Test Loss: 1.030606198310852, Time Elapsed: 0.019893646240234375
 Epoch 80, Training Error: 0.6601652882315896, Training Loss: 0.9788507168943231, Test Error: 0.6572325706481934, Test Loss: 0.9771213412284852, Time Elapsed: 0.01896381378173828
 Epoch 100, Tr

In [28]:
fig3 = plot_line('Task 1 Train/Test Errors', 'Error', 'Epochs', 
                    Shallow_Wide_Train_Error=history['train_error'], Shallow_Wide_Test_Error=history['test_error'], 
                    Shallow_Lean_Train_Error=history_shallow_lean_model['train_error'], Shallow_Lean_Test_Error=history_shallow_lean_model['test_error'],
                    Deep_Wide_Train_Error=history_deep_wide_model['train_error'], Deep_Wide_Test_Error=history_deep_wide_model['test_error'],
                    Deep_Lean_Train_Error=history_deep_lean_model['train_error'], Deep_Lean_Test_error=history_deep_lean_model['test_error'])

fig3.show()

# Task 3

# Scrapbook

In [37]:
reset_random_seeds()
a = ten_layer_net(48, [500]*10,3)
a = a.to(device)
print(a)

criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( a.to(device).parameters() , lr=0.01 )
bs = 200

history = fit(a, bs, num_epochs=2000, lr=0.01, decay=1, verbose=20)

ten_layer_net(
  (layer1): Linear(in_features=48, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=500, bias=False)
  (layer4): Linear(in_features=500, out_features=500, bias=False)
  (layer5): Linear(in_features=500, out_features=500, bias=False)
  (layer6): Linear(in_features=500, out_features=500, bias=False)
  (layer7): Linear(in_features=500, out_features=500, bias=False)
  (layer8): Linear(in_features=500, out_features=500, bias=False)
  (layer9): Linear(in_features=500, out_features=500, bias=False)
  (layer10): Linear(in_features=500, out_features=3, bias=False)
)
 Epoch 20, Training Error: 0.665165283463218, Training Loss: 1.0986132513393054, Test Error: 0.6499069809913636, Test Loss: 1.0986135244369506, Time Elapsed: 0.03766751289367676
 Epoch 40, Training Error: 0.6628925583579324, Training Loss: 1.0986116040836682, Test Error: 0.6489069819450378, Test Loss: 1.0986119508743286, Ti

In [38]:
fig1 = plot_line('Task 1 Train/Test Errors', 'Error', 'Epochs', Train_Error=history['train_error'], Test_Error=history['test_error'])
fig2 = plot_line('Task 1 Train/Test Loss', 'Loss', 'Epochs', Train_Loss=history['train_loss'], Test_Loss=history['test_loss'])

fig1.show(), fig2.show()

(None, None)

In [61]:
fig1 = plot_line('Task 1 Train/Test Errors', 'Error', 'Epochs', Train_Error=history['train_error'], Test_Error=history['test_error'])
fig2 = plot_line('Task 1 Train/Test Loss', 'Loss', 'Epochs', Train_Loss=history['train_loss'], Test_Loss=history['test_loss'])

fig1.show(), fig2.show()

(None, None)

In [None]:
# def eval_on_test_set(bs, model=None):
#     running_error= 0
#     num_batches= 0
#     running_loss = 0
#     for count in range(math.ceil(len(test_data)/bs)):

#         # extract the minibatch
#         test_minibatch_data =  test_data[count*bs:(count+1)*bs]
#         test_minibatch_label =  test_label[count*bs:(count+1)*bs]

#         # send them to the gpu
#         test_minibatch_data=test_minibatch_data.to(device)
#         test_minibatch_label=test_minibatch_label.to(device)

#         # feed it to the network
#         scores=task1_network(test_minibatch_data.float()) 

#         # compute the error made on this batch

#         # Get loss
#         loss =  criterion( scores , test_minibatch_label) 
#         running_loss += loss.detach().item()

#         error = get_error( scores , test_minibatch_label)
#         # add it to the running error
#         running_error += error.item()
#         num_batches+=1

#     # compute error rate on the full test set
#     total_error = running_error/num_batches
#     total_loss = running_loss/num_batches
#     print(f'error rate on test set = {total_error*100}%, test loss = {total_loss*100}')

In [None]:
# def display_num_param(net):
#     nb_param = 0
#     for param in net.parameters():
#         nb_param += param.numel()
#     print('There are {} ({:.2f} million) parameters in this neural network'.format(
#         nb_param, nb_param/1e6)
#          )