In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import scipy.io
import os
import numpy as np
from sklearn.model_selection import train_test_split

# Exploratory Data Analysis

In [2]:
def reset_random_seeds(seed=1):
    '''
    Sets all necessary seed for reproduceability.
    '''
    os.environ['PYTHONHASHSEED']=str(seed)
    np.random.seed(seed)
    
reset_random_seeds()

In [4]:
FILE_PATH = '../data/'

# Open table of column names text file, get the column names
f = open(FILE_PATH+'col_names.txt')
cols = f.read()
cols = [' '.join(c.split()[1:]) for c in cols.split('\n')] + ['LABEL']

# Read dataset and convert to dataframe
data = scipy.io.loadmat(FILE_PATH+'OQC.mat')
df = pd.DataFrame(data['data'], columns=cols)

print("Unique labels:", df['LABEL'].unique())
df.head()

Unique labels: [2. 0. 1.]


Unnamed: 0,"SCREW VOLUME, ACTUAL VALUE","MATERIAL CUSHION, ACTUAL VALUE","DOSAGE TIME, ACTUAL VALUE","CYCLE TIME, ACTUAL VALUE","MOULD HEATING CIRCUIT 1, ACTUAL VALUE","MOULD HEATING CIRCUIT 2, ACTUAL VALUE","MOULD HEATING CIRCUIT 3, ACTUAL VALUE","MOULD HEATING CIRCUIT 4, ACTUAL VALUE","MOULD HEATING CIRCUIT 5, ACTUAL VALUE","MOULD HEATING CIRCUIT 6, ACTUAL VALUE",...,"DOSAGE TORQUE, ACTUAL VALUE","DOSAGE ROTATIONAL SPEED, ACTUAL VALUE","HYDRAULIC ACCUMULATOR PRESSURE, ACTUAL VALUE","CHARGE PRESSURE OF ACCUMULATOR, MEASURED VALUE","MOULD-ENTRY TIME, ACTUAL VALUE","PART REMOVAL TIME, ACTUAL VALUE","MAXIMUM INJECTION PRESSURE, ACTUAL VALUE","BACK PRESSURE, ACTUAL","CLAMPING FORCE, ACTUAL",LABEL
0,0.390525,0.147798,0.156398,0.00256,0.555556,0.777778,0.555556,0.714286,0.5,0.5,...,0.719864,0.037255,0.329627,1.0,3.8e-05,0.125,0.07897,0.079625,0.922469,2.0
1,0.390525,0.147798,0.156398,0.00256,0.444444,0.666667,0.555556,0.714286,0.25,0.5,...,0.719864,0.072549,0.328711,1.0,3.8e-05,0.125,0.053677,0.053677,0.921482,2.0
2,0.508494,0.147798,0.156398,0.00256,0.444444,0.666667,0.555556,0.714286,0.25,0.5,...,0.842105,0.994118,0.326533,1.0,3.8e-05,0.125,0.056112,0.056112,0.920494,2.0
3,0.577954,0.147798,0.156398,0.00256,0.444444,0.666667,0.444444,0.571429,0.5,0.5,...,0.877759,0.994118,0.326533,1.0,3.8e-05,0.125,0.056206,0.057424,0.920494,2.0
4,0.64949,0.147798,0.156398,0.00256,0.555556,0.666667,0.444444,0.571429,0.5,0.5,...,0.874363,0.996078,0.32321,1.0,3.8e-05,0.125,0.057705,0.056112,0.92,2.0


In [5]:
# Input shape
np.array(df.iloc[0]).shape

(49,)

In [7]:
device= torch.device("cuda")
# device= torch.device("cpu")
print(device)

cuda


In [8]:
# Count number of each classes.
print(df['LABEL'].value_counts())
# Train test split with stratification sampling
train, test = train_test_split(df, test_size=0.3, train_size=0.7, random_state=1, shuffle=True, stratify=df['LABEL'])

x_train, y_train = train.iloc[:, :-1], train.iloc[:, -1:]
x_test, y_test = test.iloc[:, :-1], test.iloc[:, -1:]


1.0    1074
0.0    1008
2.0     870
Name: LABEL, dtype: int64


# Functions

In [9]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2,  output_size):
        super(three_layer_net , self).__init__()

        self.layer1 = nn.Linear(input_size, hidden_size1, bias=False)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
        self.layer3 = nn.Linear(hidden_size2, output_size, bias=False)        
        
    def forward(self, x):
        
        y       = self.layer1(x)
        y_hat   = F.relu(y)
        z       = self.layer2(y_hat)
        z_hat   = F.relu(z)
        scores  = self.layer3(z_hat)
        
        return scores

In [10]:
def eval_on_test_set(bs):
    running_error=0
    num_batches=0
    for i in range(0,10000,bs):
        # extract the minibatch
        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        # reshape the minibatch
        inputs = minibatch_data.view(bs,3072)
        # feed it to the network
        scores=net( inputs ) 
        # compute the error made on this batch
        error = get_error( scores , minibatch_label)
        # add it to the running error
        running_error += error.item()
        num_batches+=1
    # compute error rate on the full test set
    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

# Task 1

You are asked to build a three-layer feed-forward neural network to solve the monitoring problem of
injection molding machine. Your implementation must be in Pytorch and executable in Google Colab
environments. The proportion of training and testing samples is 70:30 where your model must deliver
the smallest testing error possible. In that case, you need to select the number of nodes of hidden
layers, the number of epochs, the learning rates, the mini-batch size, etc. that lead to the smallest
testing error. In this assignment, you have to use the SGD optimizer as exemplified in the lab materials
under the mini-batch update fashion. The evaluation metric here is the classification error. No feature
selection is allowed here.

In [54]:
# Three layer feed-forward neural network
# 70:30 Train-Test Split, minimize Test error

# Grid serch for number of nodes in hidden layer, epochs, learning rates, mini-batch etc.
# SGD optimizer
# Mini-batch update
# Loss function??

In [62]:
task1_network = three_layer_net(49, 500, 500, 3)
task1_network = task1_network.to(device)