

# Multi-class Classification
### Introduction

In the second part of this assignment, you will implement, train and evaluate several single layer neural networks to predict the correct category of a previously unobserved plant given certain attributes of the plant as input. We will employ the Iris Data Set for this task. The dataset contains 3 classes of 50 instances each, where each class refers to a type of iris plant. The goal is to predict the class of iris plant (Iris Setosa, Iris Versicolour or Iris Virginica) using its 4 attributes: sepal length, sepal widths, petal length, petal width. This is an exceedingly simple domain, however, it is still a useful exercise to develop better understanding of linear, logistic and softmax regression methods from the perspective of neural networks.



Before starting, we first import NumPy and PyTorch libraries.

In [None]:
import pandas as pd
import torch
import numpy as np
import random
from torch import nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer

### Load Dataset

The code for reading the `iris.data` file is already implemented for you in the following code cell. Futhermore, in the code below we mapp the textual categories (Iris Setosa, Iris Versicolour or Iris Virginica) to numerical class labels (0, 1 or 2) and split of the dataset into training (80%) and test (20%) datasets. The input features are also normalised to have zero mean and unit standard deviation.

In [None]:
from google.colab import drive
drive.mount('/content/drive') 

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/DS405B/data/iris.data', index_col=None, header=None)
df.columns = ['x1', 'x2', 'x3', 'x4', 'y']

d = {'Iris-versicolor': 1,
     'Iris-virginica': 2,
     'Iris-setosa': 0,
}

df['y'] = df['y'].map(d)

# Assign features and target

X = torch.tensor(df[['x1', 'x2', 'x3', 'x4']].values, dtype=torch.float)
y = torch.tensor(df['y'].values, dtype=torch.int)

# Shuffling & train/test split

torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)

X, y = X[shuffle_idx], y[shuffle_idx]

percent80 = int(shuffle_idx.size(0)*0.8)

X_train, X_test = X[shuffle_idx[:percent80]], X[shuffle_idx[percent80:]]
y_train, y_test = y[shuffle_idx[:percent80]], y[shuffle_idx[percent80:]]

# Normalize (mean zero, unit variance)

mu, sigma = X_train.mean(dim=0), X_train.std(dim=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### Setup all functions

In [None]:
# Transform y_train into a binary matrix of size (120,3)

def one_hot_vector(x):
  p=0
  y=torch.zeros(len(x), max(x)+1)
  for i in x :
    y[p,i]=1
    p=p+1
  return y
y_train_OHV =one_hot_vector(y_train)
y_test_OHV =one_hot_vector(y_test)

In [None]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]


### Softmax function

def softmax(z, log): #log = 1 for log_softmax
  z=torch.exp(z)
  a=torch.zeros(z.shape)
  for i in range(len(z)):
    for y in range(len(z[i])):
      e=z[i]
      if log==1:
        a[i,y] = torch.log(e[y]/torch.sum(e))
      else :
        a[i,y] = e[y]/torch.sum(e)
  return a


### Cross entropy function

def cross_entropy(softmax, y_target, reduction):
  loss = torch.zeros((len(softmax)))
  for i in range(len(softmax)):
    loss_ = sum((-y_target[i]*torch.log(softmax[i])))
    loss[i] = loss_
  if reduction == 'mean':
    loss = (1/len(loss))*sum(loss)
  elif reduction == 'sum':
    loss = sum(loss);
  return loss


### Compute accuracy 

# For models 1, 2 and 5

def comp_accuracy(label_var, pred_probas):
  pred_labels= torch.zeros(1, len(pred_probas))
  count = 0
  for i in range(len(pred_probas)):
    pred_labels[:,i] = torch.max(pred_probas[i])
    if label_var[i,((pred_probas[i] == pred_labels[:,i]).nonzero(as_tuple=True))] == torch.tensor([1.]):
      count += 1
  acc = count /  len(pred_probas)
  return np.around(acc, 2)

# For models 3 and 4

def comp_accuracy2(label_var, pred_probas):
  pred_labels= torch.zeros(1, len(pred_probas))
  count = 0
  for i in range(len(pred_probas)):
    pred_labels[:,i] = torch.max(pred_probas[i])
    if label_var[i] == torch.tensor((pred_probas[i] == pred_labels[:,i]).nonzero(as_tuple=True)):
      count += 1
  acc = count /  len(pred_probas)
  return np.around(acc, 2)

# for the 6th model

def comp_accuracy3(label_var, pred_probas): 
  count = 0
  for i in range(len(pred_probas)):
    if label_var[i] == pred_probas[i]:
      count += 1
  acc = count /  len(pred_probas)
  return np.around(acc, 2)

### Setup parameters common to all models

In [None]:
num_epochs = 100
batch_size= 10

### 1. Logistic regression for multiclass classification in One vs. All

In [None]:
def model_1(X_train, y_train_OVH, batch_size, train):
  for X, y in data_iter(batch_size, X_train, y_train_OHV):
    y=y.reshape(batch_size,3)
    logits = net1(X)
    out=torch.sigmoid(logits)
    loss = F.binary_cross_entropy(out, y)
    if train ==1:
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
  

net1 = nn.Linear(4, 3, bias=True)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.1)
for epoch in range(num_epochs):
  model_1(X_train, y_train_OHV, batch_size, 1)

print('Network Structure : torch.nn.Linear(4,3) :\n',net1)
print('Weight Of Network :\n',net1.weight)
print('Bias Of Network :\n',net1.bias)

model_1(X_test, y_test_OHV, batch_size, 0)
print('Accuracy on the test set : ', comp_accuracy(y_test_OHV, torch.sigmoid(net1(X_test))))

Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=3, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-0.9482,  1.4478, -1.7525, -1.3672],
        [ 0.0468, -1.3874,  0.3930, -0.4423],
        [ 0.6095,  0.0951,  1.1819,  1.8982]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([-1.4955, -1.0981, -1.7995], requires_grad=True)
Accuracy on the test set :  0.77


### 2. Softmax regrression with custom implementation of cross entropy loss

In [None]:
def model_2(X_train, y_train_OHV, batch_size, train):
  for X, y in data_iter(batch_size, X_train, y_train_OHV):
    y=y.reshape(batch_size,3)
    logits = net2(X)
    out=torch.sigmoid(logits)
    soft = softmax(out, 0)                # softmax function implemented
    loss = cross_entropy(soft, y, 'mean') # cross entropy function implemented
    if train == 1 :
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()


net2 = nn.Linear(4, 3, bias=True)
optimizer = torch.optim.SGD(net2.parameters(), lr=0.1)
for epoch in range(num_epochs):
  model_2(X_train, y_train_OHV, batch_size, 1)
print('Accuracy : ', comp_accuracy(y_train_OHV, softmax(torch.sigmoid(net2(X_train)), 0)))
print('Network Structure : torch.nn.Linear(4,3) :\n',net2)
print('Weight Of Network :\n',net2.weight)
print('Bias Of Network :\n',net2.bias)
model_2(X_test, y_test_OHV, batch_size, 0)
print('Accuracy on the test set : ', comp_accuracy(y_test_OHV, softmax(torch.sigmoid(net2(X_test)), 0)))

Accuracy :  0.88
Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=3, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-1.0674,  1.3413, -1.2888, -1.6485],
        [ 0.4389, -2.1717,  0.3201, -0.2496],
        [ 1.1042,  0.0772,  1.0877,  1.8823]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([-1.0175, -0.1566, -0.8550], requires_grad=True)
Accuracy on the test set :  0.73


### 3. Using F.nll_loss

In [None]:
def model_3(X_train, y_train, batch_size, train):
  for X, y in data_iter(batch_size, X_train, y_train):
    y.reshape(batch_size,1)
    logits = net3(X)
    out=torch.sigmoid(logits)
    loss = F.nll_loss(softmax(out,1), y)
    if train==1 : 
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()


net3 = nn.Linear(4, 3)
y_train=y_train.type(torch.LongTensor)
y_test=y_test.type(torch.LongTensor)
optimizer = torch.optim.SGD(net3.parameters(), lr=0.1)
for epoch in range(num_epochs):
  model_3(X_train, y_train, batch_size, 1)
print('Network Structure : torch.nn.Linear(4,3) :\n',net3)
print('Weight Of Network :\n',net3.weight)
print('Bias Of Network :\n',net3.bias)
model_3(X_test, y_test, batch_size, 0)
print('Accuracy on the test set : ', comp_accuracy2(y_test, torch.sigmoid(net3(X_test))))

Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=3, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-0.7814,  1.2840, -1.7869, -1.4156],
        [-0.1063, -2.0673,  0.4669,  0.0824],
        [ 0.7688,  0.2009,  1.6420,  1.6438]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([-1.0334, -0.1690, -0.8977], requires_grad=True)
Accuracy on the test set :  0.7


### 4 : Using cross_entropy


In [None]:
def model_4(X_train, y_train, batch_size, train):
  for X, y in data_iter(batch_size, X_train, y_train):
    y=y.reshape(batch_size).type(torch.LongTensor)
    logits = net4(X)
    loss = F.cross_entropy(logits, y)
    if train==1:  
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

net4 = nn.Linear(4, 3, bias=True)
optimizer = torch.optim.SGD(net4.parameters(), lr=0.1)
for epoch in range(num_epochs):
  model_4(X_train, y_train, batch_size, 1)
print('Network Structure : torch.nn.Linear(4,3) :\n',net4)
print('Weight Of Network :\n',net4.weight)
print('Bias Of Network :\n',net4.bias)
model_4(X_test, y_test, batch_size, 0)
print('Accuracy on the test set : ',comp_accuracy2(y_test, net4(X_test)))

Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=3, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-1.0440,  1.1888, -2.4759, -2.4354],
        [ 0.7451, -0.6325, -0.3724, -1.0989],
        [ 0.4048, -0.9229,  2.5203,  2.6941]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([-0.5752,  2.0469, -1.7772], requires_grad=True)
Accuracy on the test set :  0.97


###5 : Using Mean Squared Error Loss

In [None]:
def model_5(X_train, y_train_OHV, batch_size, train):
  MSE = torch.nn.MSELoss(reduction ='mean')
  for X, y in data_iter(batch_size, X_train, y_train_OHV):
    y = y.reshape(batch_size,3)
    logits = net5(X)
    out = torch.sigmoid(logits)
    soft = softmax(out, 0)
    loss =MSE(soft, y)
    if train == 1:
      optimizer.zero_grad()
      loss.backward()       
      optimizer.step()      


net5 = nn.Linear(4, 3, bias=True)
optimizer = torch.optim.SGD(net5.parameters(), lr=0.1)
for epoch in range(num_epochs):
  model_5(X_train, y_train_OHV, batch_size, 1)
print('Network Structure : torch.nn.Linear(4,3) :\n',net5)
print('Weight Of Network :\n',net5.weight)
print('Bias Of Network :\n',net5.bias)
model_5(X_test, y_test_OHV, batch_size, 0)
print('Accuracy on the test set : ', comp_accuracy(y_test_OHV, softmax(torch.sigmoid(net5(X_test)), 0)))

Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=3, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-0.3832,  0.8717, -0.9829, -1.1768],
        [ 0.0275, -0.7677,  0.7468, -0.3164],
        [ 0.4349,  0.2944,  1.0750,  1.2446]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([-0.3746, -0.0014, -0.3962], requires_grad=True)
Accuracy on the test set :  0.8


### 6 : Linear regression with Mean Squared Error Loss

In [None]:
def model_6(X_train, y_train, batch_size, train):
  MSE = torch.nn.MSELoss(reduction ='mean')
  for X, y in data_iter(batch_size, X_train, y_train):
    y=y.reshape(batch_size,1).type(torch.FloatTensor)
    logits = net6(X)
    loss = MSE(logits, y)
    if train == 1:
      optimizer.zero_grad()
      loss.backward()       
      optimizer.step()      


net6 = nn.Linear(4, 1, bias=True)
optimizer = torch.optim.SGD(net6.parameters(), lr=0.1)
net6.weight.data.normal_(0, 0.01)   # weight randomly sampled
net6.bias.data.fill_(0)             # Bias initialized to zero
for epoch in range(num_epochs):
  model_6(X_train, y_train, batch_size, 1)
print('Network Structure : torch.nn.Linear(4,3) :\n',net6)
print('Weight Of Network :\n',net6.weight)
print('Bias Of Network :\n',net6.bias)
model_6(X_test, y_test, batch_size, 0)

print('Accuracy on the test set : ', comp_accuracy3(y_test, torch.round(net6(X_test))))

Network Structure : torch.nn.Linear(4,3) :
 Linear(in_features=4, out_features=1, bias=True)
Weight Of Network :
 Parameter containing:
tensor([[-0.1326, -0.0435,  0.4965,  0.4162]], requires_grad=True)
Bias Of Network :
 Parameter containing:
tensor([1.0024], requires_grad=True)
Accuracy on the test set :  0.97
