<a href="https://colab.research.google.com/github/Jiyeong-Oh/Classification-Model-Implementation-with-Pytorch/blob/main/Model_Implementation_Report_with_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Library Import & Settings**

In [None]:
import sklearn
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from sklearn.datasets import fetch_openml
import numpy as np
import pandas as pd
from numpy import random as rd
import torch.nn.functional as F
from collections import Counter
device = 'cuda:0'

# Data Loading
mnist = fetch_openml('mnist_784',cache = False)
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

# **1. Data Split Function**

In [None]:
def data_split(attr, tar): # attr: features, tar: target label
  # Making Dictionary with Answer Label as Key and its Indices as Value
  # so that we can split them into train/validation/test with the same digit propotion.
  y_dict = dict(zip([i for i in tar.unique()], [list(tar[tar.values==i].index) for i in tar.unique()]))
  y_dict = dict(zip([i for i in tar.unique()], [list(np.random.choice(y_dict[i], len(y_dict[i]), replace = False)) for i in y_dict.keys()])) # Shuffling

  train_idx=[]
  val_idx=[]
  test_idx=[]
  train_tmp = [y_dict[i][:int(round(len(y_dict[i])*0.7,-1))] for i in sorted(y_dict.keys())] # Trimming train set (70%)
  val_tmp = [y_dict[i][int(round(len(y_dict[i])*0.7,-1)):int(round(len(y_dict[i])*0.85,-1))] for i in sorted(y_dict.keys())] #Ttrimming validation set (15%)
  test_tmp = [y_dict[i][int(round(len(y_dict[i])*0.85,-1)):] for i in sorted(y_dict.keys())] # Trimming test set (15%)

  for i in range(len(sorted(y_dict.keys()))):
    train_idx+=train_tmp[i]
    val_idx+=val_tmp[i]
    test_idx+=test_tmp[i]

  # Converting into Tensor Type & Normalizing
  X_train, y_train = torch.tensor(attr.loc[train_idx].to_numpy())/255, torch.tensor(tar.loc[train_idx].to_numpy())
  X_val, y_val = torch.tensor(attr.loc[val_idx].to_numpy())/255, torch.tensor(tar.loc[val_idx].to_numpy())
  X_test, y_test = torch.tensor(attr.loc[test_idx].to_numpy())/255, torch.tensor(tar.loc[test_idx].to_numpy())

  return X_train, y_train, X_val, y_val, X_test, y_test

# **2. Binary Classification via soft-margin SVM**

In [None]:
###############################
# Remote Controller
###############################
epochs_num = 10
batch_size = 64
optimizer_choose = 'Adam' # another option can be 'Adam'.
learning_rate = 0.01
gamma = 0.001 # hyperparameter that controls how much slack is allowed. 0 to infinite.
final_test = True # only true for final test accuracy
###############################


# Model Class Defining
class SoftMargin_SVM(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(SoftMargin_SVM, self).__init__()
    self.module = nn.Linear(input_dim, output_dim)

  def forward(self, x):
    y_pred = self.module(x)
    return y_pred

def softMarginSvm(X, y, epochs_num=epochs_num, batch_size=batch_size, optimizer_choose=optimizer_choose, learning_rate=learning_rate, gamma=gamma, final_test=final_test):  
  # Data Splitting
  y_two_three = y[(y.values==2) | (y.values==3)]
  for i in range(len(y_two_three)):
    if y_two_three.iloc[i] == 2:
      y_two_three.iloc[i] = -1 # converting raw labels into -1 & 1
    else:
      y_two_three.iloc[i] = 1
  X_two_three = X.loc[y_two_three.index]
  X_train, y_train, X_val, y_val, X_test, y_test = data_split(X_two_three, y_two_three)
  print(X_train.shape)
  data_size = len(X_train)

  # Model Training
  model = SoftMargin_SVM(X_train.shape[1], 1)
  if optimizer_choose == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)
  elif optimizer_choose == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
  model.train()

  def hinge_loss(y_actual, y_pred):
    return torch.clamp(1-y_pred*y_actual, min=0)

  for epoch in (range(epochs_num)):
    cost = 0
    batch = torch.randperm(data_size)
    for i in range(0, data_size, batch_size):
      input = X_train[batch[i:i+batch_size]]
      answer = y_train[batch[i:i+batch_size]]
      optimizer.zero_grad()
      prediction = model(input)
      answer = answer.reshape(-1,1)
      loss = gamma*torch.norm(list(model.parameters())[0])/2 + torch.mean(hinge_loss(answer, prediction))
      loss.backward()
      optimizer.step()
      cost += loss.data.numpy()

      # Validation Check
      model.eval()
      with torch.no_grad():
        val_prediction = model(X_val)
        acc_count = 0
        for i in range(len(val_prediction)):
          if val_prediction[i] >= 0:
            val_prediction[i] = 1
          else:
            val_prediction[i] = -1
          if val_prediction[i]==y_val[i]:
            acc_count+=1
    # Train Loss & Validation Accuracy
    print('Epoch Train Loss {}: {: .2f}'.format(epoch,cost),  ", Validation Accuracy: {: .2f}".format(acc_count/len(y_val)))


  # Final Test Accuracy Check
  if final_test==True:
    model.eval()
    with torch.no_grad():
      test_prediction = model(X_test)
      acc_count = 0
      for i in range(len(test_prediction)):
        if test_prediction[i] >= 0:
          test_prediction[i] = 1
        else:
          test_prediction[i] = -1
        if test_prediction[i]==y_test[i]:
          acc_count+=1
      print("Final Test Accuracy: {: .2f}".format(acc_count/len(y_test)))

In [None]:
softMarginSvm(X, y, epochs_num, batch_size, optimizer_choose, learning_rate, gamma, final_test)

torch.Size([9890, 784])
Epoch Train Loss 0:  17.16 , Validation Accuracy:  0.96
Epoch Train Loss 1:  11.84 , Validation Accuracy:  0.97
Epoch Train Loss 2:  11.01 , Validation Accuracy:  0.97
Epoch Train Loss 3:  10.97 , Validation Accuracy:  0.97
Epoch Train Loss 4:  10.38 , Validation Accuracy:  0.97
Epoch Train Loss 5:  10.58 , Validation Accuracy:  0.97
Epoch Train Loss 6:  9.88 , Validation Accuracy:  0.96
Epoch Train Loss 7:  10.96 , Validation Accuracy:  0.97
Epoch Train Loss 8:  10.50 , Validation Accuracy:  0.97
Epoch Train Loss 9:  9.78 , Validation Accuracy:  0.97
Final Test Accuracy:  0.98


# **3. Binary Classification via MLP**

In [None]:
###############################
# Remote Controller
###############################
epochs_num = 5
batch_size = 32
optimizer_choose = 'Adam' # another option can be 'Adam'.
learning_rate = 0.01
final_test = True # only true for final test accuracy
###############################


# Model Class Defining
class MLP_binary(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(MLP_binary, self).__init__()
    self.module1 = nn.Linear(input_dim, 256, bias=True) #input layer -> hidden layer1
    self.module2 = nn.Linear(256, 256, bias=True) #hidden layer1 -> hidden layer2
    self.module3 = nn.Linear(256, output_dim, bias=True) #hidden layer 2 -> output layer

  def forward(self, x):
    x = F.relu(self.module1(x))
    x = F.relu(self.module2(x))
    x = self.module3(x)
    y_pred = torch.sigmoid(x) # activation function for binary (0~1)
    return y_pred

def binaryMLP(X, y, epochs_num=epochs_num, batch_size=batch_size, optimizer_choose=optimizer_choose, learning_rate=learning_rate, final_test=final_test):
  # Data Splitting
  y_two_three = y[(y.values==2) | (y.values==3)]
  for i in range(len(y_two_three)):
    if y_two_three.iloc[i] == 2:
      y_two_three.iloc[i] = 0
    else:
      y_two_three.iloc[i] = 1
  X_two_three = X.loc[y_two_three.index]
  X_train, y_train, X_val, y_val, X_test, y_test = data_split(X_two_three, y_two_three)
  data_size = len(X_train)

  # Model Training
  model = MLP_binary(X_train.shape[1], 1)
  if optimizer_choose == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)
  elif optimizer_choose == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
  model.train()

  criterion = nn.BCELoss() # loss function: cross entropy for binary classification
  for epoch in range(epochs_num):
    cost = 0
    batch = torch.randperm(data_size) # batch mixing
    for i in range(0, data_size, batch_size):
      input = X_train[batch[i:i+batch_size]]
      answer = y_train[batch[i:i+batch_size]]
      optimizer.zero_grad()
      prediction = model(input)
      answer = answer.reshape(-1,1)
      loss = criterion(prediction.to(torch.float32), answer.to(torch.float32))
      loss.backward()
      optimizer.step()
      cost += loss.data.numpy()

    # Validation Check
    model.eval()
    with torch.no_grad():
      val_prediction = model(X_val)
      acc_count = 0
      for i in range(len(val_prediction)):
        if val_prediction[i] >= 0.5:
          val_prediction[i] = 1
        else:
          val_prediction[i] = 0
        if val_prediction[i]==y_val[i]:
          acc_count+=1
      # Train Loss & Validation Accuracy
      print('Epoch Train Loss {}: {: .2f}'.format(epoch,cost),  ", Validation Accuracy: {: .2f}".format(acc_count/len(y_val)))

  # Final Test Accuracy Check
  if final_test==True:
    model.eval()
    with torch.no_grad():
      test_prediction = model(X_test)
      acc_count = 0
      for i in range(len(test_prediction)):
        if test_prediction[i] >= 0.5:
          test_prediction[i] = 1
        else:
          test_prediction[i] = 0
        if test_prediction[i]==y_test[i]:
          acc_count+=1
      print("Final Test Accuracy: {: .2f}".format(acc_count/len(y_test)))


In [None]:
binaryMLP(X, y, epochs_num, batch_size, optimizer_choose, learning_rate, final_test)

Epoch Train Loss 0:  31.39 , Validation Accuracy:  0.99
Epoch Train Loss 1:  13.95 , Validation Accuracy:  0.99
Epoch Train Loss 2:  10.21 , Validation Accuracy:  0.99
Epoch Train Loss 3:  11.75 , Validation Accuracy:  0.99
Epoch Train Loss 4:  5.58 , Validation Accuracy:  0.99
Final Test Accuracy:  0.99


# **4. Multiclass Classification via MLP**

In [None]:
###############################
# Remote Controller
###############################
epochs_num = 5
batch_size = 64
optimizer_choose = 'Adam' # another option can be 'Adam'.
learning_rate = 0.0001
final_test = True # only true for final test accuracy
###############################

# Model Class Defining
class MLP_multi(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(MLP_multi, self).__init__()
    self.module1 = nn.Linear(input_dim, 256, bias=True) #input layer -> hidden layer1
    self.module2 = nn.Linear(256, 256, bias=True) #hidden layer1 -> hidden layer2
    self.module3 = nn.Linear(256, output_dim, bias=True) #hidden layer 2 -> output layer. here, the output layer should be the number of digits (10)

  def forward(self, x):
    x = F.relu(self.module1(x))
    x = F.relu(self.module2(x))
    y_pred = self.module3(x)
    # softmax is gonna be applied when cross entropy loss is calculated
    return y_pred

def multiMLP(X, y, epochs_num=epochs_num, batch_size=batch_size, optimizer_choose=optimizer_choose, learning_rate=learning_rate, final_test=final_test):
  # Data Splitting
  X_train, y_train, X_val, y_val, X_test, y_test = data_split(X, y)
  data_size = len(X_train)

  # Model Training
  model = MLP_multi(X_train.shape[1], 10)
  if optimizer_choose == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)
  elif optimizer_choose == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
  model.train()
  criterion = nn.CrossEntropyLoss() # loss function

  for epoch in range(epochs_num):
    cost = 0
    batch = torch.randperm(data_size) # batch mixing
    for i in range(0, data_size, batch_size):
      input = X_train[batch[i:i+batch_size]]
      answer = y_train[batch[i:i+batch_size]]
      optimizer.zero_grad()
      prediction = model(input)
      loss = criterion(prediction, answer)
      loss.backward()
      optimizer.step()
      cost += loss.data.numpy()

    # Validation Check
    model.eval()
    with torch.no_grad():
      val_prediction = model(X_val)
      acc_count = 0
      for i in range(len(val_prediction)):
        pred = torch.argmax(val_prediction[i])
        if pred==y_val[i]:
          acc_count+=1
      # Train Loss & Validation Accuracy
      print('Epoch Train Loss {}: {: .2f}'.format(epoch,cost),  ", Validation Accuracy: {: .2f}".format(acc_count/len(y_val)))
    
  # Final Test Accuracy Check
  if final_test==True:
    model.eval()
    with torch.no_grad():
      test_prediction = model(X_test)
      acc_count = 0
      for i in range(len(test_prediction)):
        pred = torch.argmax(test_prediction[i])
        if pred==y_test[i]:
          acc_count+=1
      print("Final Test Accuracy: {: .2f}".format(acc_count/len(y_test)))


NameError: ignored

In [None]:
multiMLP(X, y, epochs_num, batch_size, optimizer_choose, learning_rate, final_test)

# **5. Multiclass Classification via k-NN**

In [None]:
###############################
# Remote Controller
###############################
k = 10
distance_choose = 'L2Norm' #another can be 'L1Norm'
final_test = True # only true for final test accuracy
###############################

def knn(k=k, distance_choose=distance_choose, final_test = final_test):
  # Data Splitting
  X_train, y_train, X_val, y_val, X_test, y_test = data_split(X, y)

  # Validation Check
  acc_count = 0
  for i in range(len(X_val)):
    if distance_choose == 'L2Norm':
      distance = torch.norm(X_train - X_val[i], dim=1, p='fro') #L2 norm for distance calculating (Frobenius norm)
    elif distance_choose == 'L1Norm':
      distance = torch.norm(X_train - X_val[i], dim=1, p=1 ) #L1 norm for distance calculating (nuclear norm)
    knn_idx = torch.topk(distance, k, largest=False)[1]
    k_neighbors = y_train.numpy()[knn_idx]
    pred = Counter(k_neighbors).most_common(1)[0][0]
    if pred==y_val[i]:
          acc_count+=1
  print("Validation Accuracy: {: .2f}".format(acc_count/len(y_val)))

  # Final Test Accuracy Check
  if final_test==True:
    acc_count = 0
    for i in range(len(X_test)):
      if distance_choose == 'L2Norm':
        distance = torch.norm(X_train - X_val[i], dim=1, p='fro') #L2 norm for distance calculating (Frobenius norm)
      elif distance_choose == 'L1Norm':
        distance = torch.norm(X_train - X_val[i], dim=1, p=1) #L1 norm for distance calculating (nuclear norm)
      knn_idx = torch.topk(distance, k, largest=False)[1]
      k_neighbors = y_train.numpy()[knn_idx]
      pred = Counter(k_neighbors).most_common(1)[0][0]
      if pred==y_test[i]:
            acc_count+=1
    print("Final Test Accuracy: {: .2f}".format(acc_count/len(y_test)))

In [None]:
knn(k, distance_choose, final_test)

Validation Accuracy:  0.97
Final Test Accuracy:  0.96
