# Lab4
## Author : Jian Gao

In [1]:
import torch
import pandas as pd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

## Define Multilevel Perceptron with 2 layer class

In [2]:
class MLP2(nn.Module):
    def __init__(self,input_dim, hid_dim, output_dim):
        self.input_dim=input_dim
        self.hid_dim=hid_dim
        self.output_dim=output_dim
        # initialze the superclass
        super(MLP2, self).__init__()
        # this will create a linear layer with input_dim x hid_dim parameters
        # in addition to a bias unit with hid_dim parameters
        self.lin1 = nn.Linear(input_dim, hid_dim)
        # same thing here except hid_dim x output_dim
        self.lin2 = nn.Linear(hid_dim, output_dim)
    
    # this is where the meat of the action is
    def forward(self, x):
        x = self.lin1(x)  # linear combination of inputs
        x = torch.sigmoid(x) # then through sigmoid - output of first layer
        x = self.lin2(x) # linear combination of hidden units
        return x

In [3]:
def weights_init(model):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            # initialize the weight tensor, here we use a normal distribution
            m.weight.data.normal_(0, 1)


### Define Train function, and print loss every 1000 iteration

In [4]:
def train(data, target, epoch, inputDim, hiddenDim, outputDim):
    model = MLP2(inputDim, hiddenDim, outputDim)
    weights_init(model)
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.0006, momentum=0.9)
    for i in range(epoch):
        x_var = Variable(data, requires_grad=False)
        y_var = Variable(target, requires_grad=False)
        optimizer.zero_grad()
        y_hat = model(x_var.float())
        y_var = y_var.long()
        loss = loss_func(y_hat, y_var)
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print("Epoch: {0}, Loss: {1}, ".format(i, loss.data.numpy()))
    return model

### Define Test function, and return the prediction accuracy

In [5]:
def test(test_data, test_target, trained_model):
    x_var = Variable(test_data, requires_grad=False)
    y_var = Variable(test_target, requires_grad=False)
    y_hat = trained_model(x_var.float())
    y_var = y_var.long()
    all_prediction_index = []
    prediction = y_hat.detach().numpy()
    for i in range (prediction.shape[0]):
        all_prediction_index.append(np.where(prediction[i]==max(prediction[i])))
    acc=0
    
    for x in range (len(all_prediction_index)):
        
        if all_prediction_index[x][0][0] == int (y_var[x]):
            acc +=1
    return acc/len(all_prediction_index)
        

### Load data and get train and test datasets and targets. Also change vowel targets into integers that represent each target

In [6]:
url='https://raw.githubusercontent.com/efosler/cse5522data/master/vowelfmts.csv'
df=pd.read_csv(url)
targets = {'iy':0, 'ih':1, 'ey':2, 'eh':3, 'ah':4, 'ao':5, 'ow':6, 'uw':7, 'ax':8 }
for i in range (len(df['vowel'])):
    df.at[i, 'vowel'] = targets[df['vowel'][i]]
train_data, test_data, train_targets, test_targets = train_test_split(df[['f1','f2']],df[['vowel']])
train_data = np.array(train_data).astype(float)
scaler = preprocessing.StandardScaler().fit(train_data)
train_data = scaler.transform(train_data)   
test_data = np.array(test_data).astype(float)
test_data = scaler.transform(test_data)   
train_targets = np.array(train_targets).astype(int)
test_targets = np.array(test_targets).astype(int)
list_test_targets=[]
list_train_targets=[]
for i in range (len(test_targets)):
    list_test_targets.append(test_targets[i][0])
for x in range (len(train_targets)):
    list_train_targets.append(train_targets[x][0])
data_train = torch.tensor(train_data)
data_test = torch.tensor(test_data)
target_train = torch.tensor(list_train_targets)
target_test = torch.tensor(list_test_targets)

target_train

tensor([6, 0, 3,  ..., 5, 6, 7])

## Part 1

### Training Model

In [7]:
inputDimension = 2
hiddenDimension = 100
outputDimension = 9
model1 = train(data_train,target_train,50000,inputDimension, hiddenDimension, outputDimension)

Epoch: 0, Loss: 9.85786247253418, 
Epoch: 1000, Loss: 1.430095911026001, 
Epoch: 2000, Loss: 1.3031542301177979, 
Epoch: 3000, Loss: 1.2640380859375, 
Epoch: 4000, Loss: 1.240235686302185, 
Epoch: 5000, Loss: 1.2225170135498047, 
Epoch: 6000, Loss: 1.208197832107544, 
Epoch: 7000, Loss: 1.1960877180099487, 
Epoch: 8000, Loss: 1.185548186302185, 
Epoch: 9000, Loss: 1.1761884689331055, 
Epoch: 10000, Loss: 1.167763590812683, 
Epoch: 11000, Loss: 1.1600896120071411, 
Epoch: 12000, Loss: 1.1530475616455078, 
Epoch: 13000, Loss: 1.1465362310409546, 
Epoch: 14000, Loss: 1.1404814720153809, 
Epoch: 15000, Loss: 1.1348258256912231, 
Epoch: 16000, Loss: 1.1295231580734253, 
Epoch: 17000, Loss: 1.12453293800354, 
Epoch: 18000, Loss: 1.1198209524154663, 
Epoch: 19000, Loss: 1.1153600215911865, 
Epoch: 20000, Loss: 1.1111290454864502, 
Epoch: 21000, Loss: 1.1071083545684814, 
Epoch: 22000, Loss: 1.1032763719558716, 
Epoch: 23000, Loss: 1.0996222496032715, 
Epoch: 24000, Loss: 1.0961283445358276, 


### Test model on test dataset and targets

In [10]:
accuracy = test(data_test,target_test,model1)
print ("Accuracy: ", accuracy)

Accuracy:  0.6160493827160494


## Part 2
### Let hidden units of MLP model be 200 this time.

In [11]:
hiddenDimension2 = 200
model2 = train(data_train,target_train,50000,inputDimension, hiddenDimension2, outputDimension)

Epoch: 0, Loss: 8.498006820678711, 
Epoch: 1000, Loss: 1.2580021619796753, 
Epoch: 2000, Loss: 1.208699345588684, 
Epoch: 3000, Loss: 1.181119441986084, 
Epoch: 4000, Loss: 1.1609309911727905, 
Epoch: 5000, Loss: 1.1447937488555908, 
Epoch: 6000, Loss: 1.1313188076019287, 
Epoch: 7000, Loss: 1.119780421257019, 
Epoch: 8000, Loss: 1.1097279787063599, 
Epoch: 9000, Loss: 1.1008610725402832, 
Epoch: 10000, Loss: 1.0929547548294067, 
Epoch: 11000, Loss: 1.0858397483825684, 
Epoch: 12000, Loss: 1.0793893337249756, 
Epoch: 13000, Loss: 1.0735032558441162, 
Epoch: 14000, Loss: 1.0680941343307495, 
Epoch: 15000, Loss: 1.0631028413772583, 
Epoch: 16000, Loss: 1.0584731101989746, 
Epoch: 17000, Loss: 1.054162621498108, 
Epoch: 18000, Loss: 1.0501347780227661, 
Epoch: 19000, Loss: 1.0463553667068481, 
Epoch: 20000, Loss: 1.0428001880645752, 
Epoch: 21000, Loss: 1.0394481420516968, 
Epoch: 22000, Loss: 1.0362788438796997, 
Epoch: 23000, Loss: 1.0332729816436768, 
Epoch: 24000, Loss: 1.030421495437

In [12]:
accuracy = test(data_test,target_test,model2)
print ("Accuracy: ", accuracy)

Accuracy:  0.6407407407407407


### Build a three layer multilevel perceptron with same 100 hidden units each layer. It is the second variation of the model

In [13]:
class MLP3(nn.Module):
    def __init__(self,input_dim, hid_dim1, hid_dim2,output_dim):
        self.input_dim=input_dim
        self.hid_dim1=hid_dim1
        self.hid_dim2=hid_dim2
        self.output_dim=output_dim
        # initialze the superclass
        super(MLP3, self).__init__()
        # this will create a linear layer with input_dim x hid_dim parameters
        # in addition to a bias unit with hid_dim parameters
        self.lin1 = nn.Linear(input_dim, hid_dim1)
        self.lin2 = nn.Linear(hid_dim1, hid_dim2)
        # same thing here except hid_dim x output_dim
        self.lin3 = nn.Linear(hid_dim2, output_dim)
    
    # this is where the meat of the action is
    def forward(self, x):
        x = self.lin1(x)  # linear combination of inputs
        x = torch.sigmoid(x) # then through sigmoid - output of first layer
        x = self.lin2(x) # linear combination of hidden units
        x = torch.sigmoid(x)
        x = self.lin3(x)
        return x

In [14]:
def trainWithMLP3(data, target, epoch, inputDim, hiddenDim1, hiddenDim2, outputDim):
    model = MLP3(inputDim, hiddenDim1, hiddenDim2, outputDim)
    weights_init(model)
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.0006, momentum=0.9)
    for i in range(epoch):
        x_var = Variable(data, requires_grad=False)
        y_var = Variable(target, requires_grad=False)
        optimizer.zero_grad()
        y_hat = model(x_var.float())
        y_var = y_var.long()
        loss = loss_func(y_hat, y_var)
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print("Epoch: {0}, Loss: {1}, ".format(i, loss.data.numpy()))
    return model

In [16]:
inputDimension = 2
hiddenDimension1 = 100
hiddenDimenstion2 = 100
outputDimension = 9
model3 = trainWithMLP3(data_train,target_train,50000,inputDimension, hiddenDimension1, hiddenDimension2, outputDimension)

Epoch: 0, Loss: 11.70850658416748, 
Epoch: 1000, Loss: 1.065604567527771, 
Epoch: 2000, Loss: 1.0116064548492432, 
Epoch: 3000, Loss: 0.988967776298523, 
Epoch: 4000, Loss: 0.9745367169380188, 
Epoch: 5000, Loss: 0.9638038873672485, 
Epoch: 6000, Loss: 0.9551545977592468, 
Epoch: 7000, Loss: 0.9479348659515381, 
Epoch: 8000, Loss: 0.9418618679046631, 
Epoch: 9000, Loss: 0.9366999864578247, 
Epoch: 10000, Loss: 0.9322539567947388, 
Epoch: 11000, Loss: 0.9284011125564575, 
Epoch: 12000, Loss: 0.9250258803367615, 
Epoch: 13000, Loss: 0.9220333099365234, 
Epoch: 14000, Loss: 0.9193418622016907, 
Epoch: 15000, Loss: 0.916888952255249, 
Epoch: 16000, Loss: 0.9146325588226318, 
Epoch: 17000, Loss: 0.9125417470932007, 
Epoch: 18000, Loss: 0.9105932116508484, 
Epoch: 19000, Loss: 0.9087673425674438, 
Epoch: 20000, Loss: 0.9070479869842529, 
Epoch: 21000, Loss: 0.9054270386695862, 
Epoch: 22000, Loss: 0.9038897752761841, 
Epoch: 23000, Loss: 0.9024304747581482, 
Epoch: 24000, Loss: 0.90103995800

In [17]:
accuracy = test(data_test,target_test,model3)
print ("Accuracy: ", accuracy)

Accuracy:  0.662962962962963


## Result of three different model:
    First model: Loss: 1.0397157669067383, Accuracy:  0.6160493827160494
    Second model: Loss: 0.9858912229537964, Accuracy:  0.6407407407407407
    Third model: Loss: 0.8781473636627197, Accuracy:  0.662962962962963
    
    First model is the model in Part 2, 2 layer Perceptron with 100 hidden units.
    Second model is the model with a 2 layer Perceptron with 200 hidden units.
    Third model is the model with a 3 layer Perceptron with 100 hidden units.

## <font color = "red"> Answer to question: </font> Create a short writeup explaining (1) why you chose the variations, and (2) how it affected performance on the test set.
    Since the input dimension is always 2 ("f1", "f2") and output dimension is always 9 (There are 9 vowels), so the things we can change in model is limited. Changing hidden layers, hidden units or functions. Here are reasons why I chose the variations.
    (1) Compare the second model with the first model, I added 100 hidden units. In the text book, it said that appropriate hidden neurons with increase the performance of model. I wanted to try the affect of hidden units in this dataset. So I added 100 hidden units. 
    
        Compare the third model with the first model, I added a hidden layer and kept the same number of hidden units. As I know in deeplearning, increasing the number of hidden layers might improve the accuracy or might not, it really depends on the complexity of the problem that you are trying to solve. I wanted to see the result of adding just one layer in this model, and whether it improved the performance or not.
      
    (2) Compare the second model with the first model, the loss in training droped 0.05 and the accuracy in test set increased 0.03. So, adding 100 hidden units increased the performance of the model.
    
        Compare the third model with the first model, the loss in trainning droped 0.16 and the accuracy in test set increased 0.05. So, adding 1 hidden layer increase the performance of the model, and the it affect more than just adding 100 hidden units.