In [None]:
from google.colab import drive
drive.mount('/content/drive/')
path='/content/drive/MyDrive/team8/task1/'

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os, numpy as np
from sklearn.utils import shuffle
from numpy import random

In [None]:
seed = 32
torch.manual_seed(seed)

In [None]:
os.listdir(path)

In [None]:
X_train=pd.read_csv(path+'train_data.csv',header = None)
y_train=pd.read_csv(path+'train_label.csv',header = None)
X_test=pd.read_csv(path+'test_data.csv',header = None)
y_test=pd.read_csv(path+'test_label.csv',header = None)
X_valid=pd.read_csv(path+'val_data.csv',header = None)
y_valid=pd.read_csv(path+'val_label.csv',header = None).iloc[:,-1]

In [None]:
X_test.shape

In [None]:
combined_data = pd.concat([X_train, y_train], axis=1)
shuffled_data = shuffle(combined_data)
shuffled_data.reset_index(drop=True, inplace=True)
X_train=shuffled_data.iloc[:, :-1]
y_train=shuffled_data.iloc[:, -1]

In [None]:
X_valid = torch.from_numpy(X_valid.values).float()
y_valid = torch.from_numpy(y_valid.values).long()

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.hl1=nn.Linear(36, 20)
    self.hl2=nn.Linear(20, 10)
    self.ol=nn.Linear(10, 5)

  def forward(self, x):
    x=self.hl1(x)
    x=F.tanh(x)
    x=self.hl2(x)
    x=F.tanh(x)
    return self.ol(x).reshape(-1, 5)

In [None]:
def conf_matrix(model):
  with torch.no_grad():
    y_pred=[]
    y_true=[]
    for i in range(len(X_train)):
      X=torch.tensor(X_train.iloc[i], dtype=torch.float32)#, device='cuda')
      y=torch.tensor(y_train.iloc[i], dtype=torch.float32)#, device='cuda')
      y_hat=model(X)
      y_l=y_hat.tolist()[0]
      pred=y_l.index(max(y_l))
      y_pred.append(pred)
      y_true.append(y.item())
    cf_train = confusion_matrix(y_true, y_pred)

    y_pred=[]
    y_true=[]
    for i in range(len(X_test)):
      X=torch.tensor(X_test.iloc[i], dtype=torch.float32)#, device='cuda')
      y=torch.tensor(y_test.iloc[i], dtype=torch.float32)#, device='cuda')
      y_hat=model(X)
      y_l=y_hat.tolist()[0]
      pred=y_l.index(max(y_l))
      y_pred.append(pred)
      y_true.append(y.item())
    cf_test = confusion_matrix(y_true, y_pred)

  return cf_train, cf_test

### Weights initialization

In [None]:
random.seed(seed)
weights_layer1 = random.normal(loc=0, scale=1/(36**0.5), size=(20, 36))
weights_layer2 = random.normal(loc=0, scale=1/(20**0.5), size=(10, 20))
weights_layer3 = random.normal(loc=0, scale=1/(10**0.5), size=(5, 10))

weights_dict = {36:weights_layer1, 20:weights_layer2, 10:weights_layer3}

Takes in a module and initializes all linear layers with weight
      values taken from a normal distribution.

In [None]:
def weights_init_normal(m):
  classname = m.__class__.__name__
  if classname.find('Linear') != -1:  # Only for linear layers
    y = m.in_features
    m.weight.data = torch.tensor(weights_dict[y], dtype=torch.float32)
    m.bias.data.fill_(torch.tensor(0, dtype=torch.float32))

In [None]:
learning_rate = 0.01
loss_func = nn.CrossEntropyLoss()
threshold = 0.002

In [None]:
def train(model, optimizer):
  model.train()
  delta_avg_loss=2*threshold
  epoch=1
  avg_error_dict = {}
  prev_epoch_loss=float('inf')
  validation_loss = {}
  while abs(delta_avg_loss)>threshold:
    epoch_loss=0
    for i in range(len(X_train)):
      X=torch.tensor(X_train.iloc[i], dtype=torch.float32)#.to('cuda')
      y=torch.tensor(y_train.iloc[i], dtype=torch.float32)#.to('cuda')
      y_hat=model(X)
      loss=loss_func(y_hat, y.reshape(1).long())
      loss.backward() # compute gradient of loss with respect to model parameter
      optimizer.step() # Update the parameters
      optimizer.zero_grad() # Assign zero to gradient
      with torch.no_grad():
        epoch_loss+=loss.item()
        if i%100==0:
          print(f'EPOCH: {epoch} LOSS: {loss.item()}')
    with torch.no_grad():
        y_hat_valid = model(X_valid)
        valid_loss = loss_func(y_hat_valid, y_valid)
        validation_loss.update({epoch:valid_loss})
    avg_epoch_loss = epoch_loss/len(X_train)
    print('\n'+'-'*100+'\n')
    print(f'Total Loss: {epoch_loss}, Average Loss: {avg_epoch_loss} Average Validation Loss: {valid_loss}')
    print('\n'+'-'*100+'\n')
    delta_avg_loss = prev_epoch_loss-avg_epoch_loss
    prev_epoch_loss = avg_epoch_loss
    avg_error_dict.update({epoch:avg_epoch_loss})
    epoch+=1
  return avg_error_dict, validation_loss

## Delta

In [None]:
model=Net()
model.to(torch.float32)
model.apply(weights_init_normal)
optimizer=optim.SGD(model.parameters(), lr=learning_rate)
avg_error_dict, validation_loss = train(model, optimizer)

In [None]:
plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()), label='Average training loss')
plt.plot(list(map(str, list(validation_loss.keys()))), list(validation_loss.values()), label='Average validation loss')
plt.xticks(list(map(str, list(avg_error_dict.keys())))[::3])
plt.legend()
plt.grid('true')
plt.xlabel('Epochs')
plt.ylabel('Average Loss')
plt.title('Average Loss vs. Epochs for Delta optimizer')
plt.show()

In [None]:
cf_train, cf_test = conf_matrix(model)
sns.heatmap(cf_train, annot=True)
plt.title('Confusion Matrix for Training Data')
plt.show()
plt.title('Confusion Matrix for Test Data')
sns.heatmap(cf_test, annot=True)
plt.show()

# 49.4 %

## Generalized Delta

In [None]:
model=Net()
model.to(torch.float32)
model.apply(weights_init_normal)
th = 1e-4
optimizer=optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
avg_error_dict, validation_loss = train(model, optimizer)

In [None]:
plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()), label='Average training loss')
plt.plot(list(map(str, list(validation_loss.keys()))), list(validation_loss.values()), label='Average validation loss')
plt.xticks(list(map(str, list(avg_error_dict.keys()))))
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Average Loss')
plt.title('Average Loss vs. Epochs for Generalized Delta optimizer')
plt.grid('true')

In [None]:
cf_train, cf_test = conf_matrix(model)
sns.heatmap(cf_train, annot=True)
plt.title('Confusion Matrix for Training Data')
plt.show()
plt.title('Confusion Matrix for Test Data')
sns.heatmap(cf_test, annot=True)
plt.show()

## AdaGrad

In [None]:
model=Net()
model.to(torch.float32)
model.apply(weights_init_normal)
optimizer=optim.Adagrad(model.parameters(), lr=learning_rate)
avg_error_dict, validation_loss = train(model, optimizer)

In [None]:
# plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()))
# plt.xlabel('Epochs')
# plt.ylabel('Average Loss')
# plt.title('Average Loss vs. Epochs for AdaGrad optimizer')

In [None]:
plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()), label='Average training loss')
plt.plot(list(map(str, list(validation_loss.keys()))), list(validation_loss.values()), label='Average validation loss')
plt.xticks(list(map(str, list(avg_error_dict.keys())))[::3])
plt.legend()
plt.grid('true')
plt.xlabel('Epochs')
plt.ylabel('Average Loss')
plt.title('Average Loss vs. Epochs for AdaGrad')
plt.show()

In [None]:
cf_train, cf_test = conf_matrix(model)
sns.heatmap(cf_train, annot=True)
plt.title('Confusion Matrix for Training Data')
plt.show()
plt.title('Confusion Matrix for Test Data')
sns.heatmap(cf_test, annot=True)
plt.show()

## RMSprop

In [None]:
model=Net()
model.to(torch.float32)
model.apply(weights_init_normal)
optimizer=optim.RMSprop(model.parameters(), lr=learning_rate)
avg_error_dict, validation_loss = train(model, optimizer)

In [None]:
plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()), label='Average training loss')
plt.plot(list(map(str, list(validation_loss.keys()))), list(validation_loss.values()), label='Average validation loss')
plt.xticks(list(map(str, list(avg_error_dict.keys()))))
plt.legend()
plt.grid('true')
plt.xlabel('Epochs')
plt.ylabel('Average Loss')
plt.title('Average Loss vs. Epochs for RMSprop optimizer')
plt.show()

In [None]:
cf_train, cf_test = conf_matrix(model)
sns.heatmap(cf_train, annot=True)
plt.title('Confusion Matrix for Training Data')
plt.show()
plt.title('Confusion Matrix for Test Data')
sns.heatmap(cf_test, annot=True)
plt.show()

## AdaM

In [None]:
model=Net()
model.to(torch.float32)
model.apply(weights_init_normal)
optimizer=optim.Adam(model.parameters(), lr=learning_rate)
avg_error_dict, validation_loss = train(model, optimizer)

In [None]:
plt.plot(list(map(str, list(avg_error_dict.keys()))), list(avg_error_dict.values()), label='Average training loss')
plt.plot(list(map(str, list(validation_loss.keys()))), list(validation_loss.values()), label='Average validation loss')
plt.xticks(list(map(str, list(avg_error_dict.keys())))[::2])
plt.legend()
plt.grid('true')
plt.xlabel('Epochs')
plt.ylabel('Average Loss')
plt.title('Average Loss vs. Epochs for AdaM optimizer')
plt.show()

In [None]:
cf_train, cf_test = conf_matrix(model)
sns.heatmap(cf_train, annot=True)
plt.title('Confusion Matrix for Training Data')
plt.show()
plt.title('Confusion Matrix for Test Data')
sns.heatmap(cf_test, annot=True)
plt.show()