<a href="https://colab.research.google.com/github/JK-the-Ko/Thermo-Fluid-Dynamics-Experiment/blob/main/2023-2/%EC%97%B4%EC%9C%A0%EC%B2%B4%EA%B3%B5%ED%95%99%EC%8B%A4%ED%97%981_Week10_PyTorch_Multi_Class_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multi-Class Classification Using PyTorch Framework

## Check NVIDIA GPU Setting

In [None]:
!nvidia-smi

## Load Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("/content/drug200.csv")

### View DataFrame

In [None]:
df.head(10)

## Get Number of Data

In [None]:
df.shape

### Get Input and Target Data

In [None]:
inputData, targetData = df.drop(columns=["Drug"], axis=1), df["Drug"]

### Get Input Feature Names

In [None]:
inputData.columns

### Get Data Type of Input Features

In [None]:
inputData.info()

## Show Target Data

In [None]:
targetData

## Preprocessing

## One-Hot Encoding

In [None]:
inputDataOHE = inputData.loc[:, ["Sex","BP","Cholesterol"]]

In [None]:
inputDataOHE

In [None]:
inputDataOHE = pd.get_dummies(inputDataOHE)

In [None]:
inputDataOHE

### Min-Max Normalization

In [None]:
def MinMaxNorm(dataFrame) :
  return (dataFrame-dataFrame.min())/(dataFrame.max()-dataFrame.min())

In [None]:
inputDataMMN = MinMaxNorm(inputData.drop(["Sex","BP","Cholesterol"], axis=1))

In [None]:
inputDataMMN

## Merge Preprocessed Input Data

In [None]:
inputData = pd.concat([inputDataOHE, inputDataMMN], axis=1)

In [None]:
inputData

## One-Hot Encoding

In [None]:
targetData = pd.get_dummies(targetData)

In [None]:
targetData

### Split Dataset

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
inputData, targetData = np.array(inputData), np.array(targetData)

In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(inputData, targetData, test_size=0.1, random_state=42)

In [None]:
xTrain, xValid, yTrain, yValid = train_test_split(xTrain, yTrain, test_size=0.1, random_state=42)

In [None]:
print(xTrain.shape, yTrain.shape)
print(xValid.shape, yValid.shape)
print(xTest.shape, yTest.shape)

## Create PyTorch DataLoader Class

In [None]:
import torch
from torch.utils.data import Dataset

In [None]:
class myDataLoader(Dataset) :
  def __init__(self, inputData:np.array, targetData:np.array) :
    # Inheritance
    super(myDataLoader, self).__init__()

    # Initialize Variable
    self.inputData = inputData
    self.targetData = targetData

  def __getitem__(self, index) :
    input = self.inputData[index, :]
    target = self.targetData[index]

    input = torch.as_tensor(input)
    target = torch.as_tensor(target)

    return {"input":input.float(), "target":target.float()}

  def __len__(self) :
    return len(self.inputData)

## Create PyTorch Regression Model

In [None]:
from torch import nn
import torch.nn.functional as F

In [None]:
class myModel(nn.Module) :
  def __init__(self, inputDim:int, targetDim:int, channels:int) :
    # Inheritance
    super(myModel, self).__init__()

    # Create MLP Layer Instance
    self.layer0 = nn.Linear(inputDim, channels)
    self.layer1 = nn.Linear(channels, channels)
    self.layer2 = nn.Linear(channels, targetDim)

  def forward(self, input) :
    output = F.relu(self.layer0(input))
    output = F.relu(self.layer1(output))
    output = self.layer2(output)

    return output

## Create Training Option (Hyperparameter) Dictionary

In [None]:
opt = {"seed":42,
       "numClass":5,
       "batchSize":8,
       "lr":1e-3,
       "epochs":50,
       "isCUDA":torch.cuda.is_available()}

## Train DL Model

In [None]:
from torch.utils.data import DataLoader
from torch import optim

from torchsummary import summary

from tqdm import tqdm

### Fix Seed

In [None]:
import random

In [None]:
def fixSeed(seed) :
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

## Create Average Meter Instance

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val*n
        self.count += n
        self.avg = self.sum / self.count

## Create Accuracy Computation Function

In [None]:
def computeAcc(pred, target) :
  acc = (torch.argmax(pred, dim=1)==torch.argmax(target, dim=1)).sum()/pred.size(0)

  return acc

## Training Code as a Function (Abstraction)

In [None]:
def train(opt, dataset, criterion) :
  fixSeed(opt["seed"])

  trainDataLoader = DataLoader(myDataLoader(dataset["xTrain"], dataset["yTrain"]), batch_size=opt["batchSize"], shuffle=True, drop_last=True)
  validDataLoader = DataLoader(myDataLoader(dataset["xValid"], dataset["yValid"]), batch_size=opt["batchSize"], shuffle=False, drop_last=False)

  fixSeed(opt["seed"])
  model = myModel(xTrain.shape[1], opt["numClass"], 64)
  if opt["isCUDA"] :
    model = model.cuda()

  summary(model, (1, dataset["xTrain"].shape[1]))

  optimizer = optim.Adam(model.parameters(), lr=opt["lr"])

  trainLoss, validLoss = AverageMeter(), AverageMeter()
  trainAcc, validAcc = AverageMeter(), AverageMeter()
  trainLossList, validLossList = [], []
  trainAccList, validAccList = [], []
  bestAcc = 0

  for epoch in range(1, opt["epochs"]+1) :
    trainBar = tqdm(trainDataLoader)
    trainLoss.reset(), trainAcc.reset()

    for data in trainBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      optimizer.zero_grad()
      pred = model(input)
      loss = criterion(pred, target)
      loss.backward()
      optimizer.step()

      trainLoss.update(loss.item(), opt["batchSize"])
      trainAcc.update(computeAcc(pred, target).item(), opt["batchSize"])
      trainBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Train] < Accuracy:{trainAcc.avg:.6f} | Loss:{trainLoss.avg:.6f} >")

    trainLossList.append(trainLoss.avg)
    trainAccList.append(trainAcc.avg)

    validBar = tqdm(validDataLoader)
    validLoss.reset(), validAcc.reset()

    for data in validBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      model.eval()
      with torch.no_grad() :
        pred = model(input)
        loss = criterion(pred, target)

        validLoss.update(loss.item(), opt["batchSize"])
        validAcc.update(computeAcc(pred, target).item(), opt["batchSize"])
        validBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Valid] < Accuracy:{validAcc.avg:.6f} | Loss:{trainLoss.avg:.6f} >")

    validLossList.append(validLoss.avg)
    validAccList.append(validAcc.avg)

    if validAcc.avg > bestAcc :
      bestAcc = validAcc.avg
      torch.save(model.state_dict(), "bestModel.pth")

    torch.save(model.state_dict(), "latestModel.pth")

  return (trainLossList, validLossList), (trainAccList, validAccList)

## Train Model

In [None]:
lossList, accList = train(opt,
                          {"xTrain":xTrain, "yTrain":yTrain, "xValid":xValid, "yValid":yValid},
                          nn.CrossEntropyLoss())

## Plot Training vs. Validation Loss Graph

In [None]:
plt.figure(figsize=(20,10))

plt.plot(np.arange(0, opt["epochs"], 1), lossList[0], label="Training Loss")
plt.plot(np.arange(0, opt["epochs"], 1), lossList[1], label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("BCE Loss")
plt.legend(loc="best")

plt.show()

## Plot Training vs. Validation Accuracy Graph

In [None]:
plt.figure(figsize=(20,10))

plt.plot(np.arange(0, opt["epochs"], 1), accList[0], label="Training Accuracy")
plt.plot(np.arange(0, opt["epochs"], 1), accList[1], label="Validation Accuracy")

plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(loc="best")

plt.show()

## Inference Code as a Function (Abstraction)

In [None]:
def inference(opt, inputData, modelPath) :
  weights = torch.load(modelPath)

  model = myModel(xTrain.shape[1], opt["numClass"], 64)
  model.load_state_dict(weights)
  if opt["isCUDA"] :
    model = model.cuda()

  inputDataTensor = torch.as_tensor(inputData).float()

  predList = []

  model.eval()

  with torch.no_grad() :
    with tqdm(total=inputData.shape[0]) as pBar :
      for inputData in inputDataTensor :
        if opt["isCUDA"] :
          inputData = inputData.cuda()

        pred = model(inputData)
        predList.append(torch.argmax(pred, dim=0).detach().cpu().item())

        pBar.update()

  return predList

## Inference Result

In [None]:
predList = inference(opt, xTest, "/content/bestModel.pth")

In [None]:
predList

## One-Hot Encoding to Label-Encoding

In [None]:
yTest

In [None]:
yTest = np.argmax(yTest, axis=1)

In [None]:
yTest

## Quantitative Assessment

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
accScore = accuracy_score(yTest, predList)
print(accScore)

In [None]:
cm = confusion_matrix(yTest, predList)
print(cm)

In [None]:
clsRp = classification_report(yTest, predList)
print(clsRp)