<a href="https://colab.research.google.com/github/JK-the-Ko/Thermo-Fluid-Dynamics-Experiment/blob/main/2023-2/%EC%97%B4%EC%9C%A0%EC%B2%B4%EA%B3%B5%ED%95%99%EC%8B%A4%ED%97%981_Week10_PyTorch_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regression Using PyTorch Framework

## Check NVIDIA GPU Setting

In [None]:
!nvidia-smi

## Load Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("/content/train.csv")

### View DataFrame

In [None]:
df.head(10)

## View Statistic

In [None]:
df.describe()

### Get Number of Data

In [None]:
df.shape

### Get Input and Target Data

In [None]:
inputData, targetData = df.drop(columns=["critical_temp"], axis=1), df["critical_temp"]

### Get Input Feature Names

In [None]:
inputData.columns

### Get Data Type of Input Features

In [None]:
inputData.info()

## Show Target Data

In [None]:
targetData

## Preprocessing

### Min-Max Normalization

In [None]:
def MinMaxNorm(dataFrame) :
  return (dataFrame-dataFrame.min())/(dataFrame.max()-dataFrame.min())

In [None]:
inputData = MinMaxNorm(inputData)
targetData = MinMaxNorm(targetData)

In [None]:
inputData

In [None]:
targetData

### Split Dataset

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
inputData, targetData = np.array(inputData), np.array(targetData)

In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(inputData, targetData, test_size=0.1, random_state=42)

In [None]:
xTrain, xValid, yTrain, yValid = train_test_split(xTrain, yTrain, test_size=0.1, random_state=42)

In [None]:
print(xTrain.shape, yTrain.shape)
print(xValid.shape, yValid.shape)
print(xTest.shape, yTest.shape)

## Create PyTorch DataLoader Class

In [None]:
import torch
from torch.utils.data import Dataset

In [None]:
class myDataLoader(Dataset) :
  def __init__(self, inputData:np.array, targetData:np.array) :
    # Inheritance
    super(myDataLoader, self).__init__()

    # Initialize Variable
    self.inputData = inputData
    self.targetData = targetData

  def __getitem__(self, index) :
    input = self.inputData[index, :]
    target = self.targetData[index]

    input = torch.as_tensor(input)
    target = torch.as_tensor(target).unsqueeze(0)

    return {"input":input.float(), "target":target.float()}

  def __len__(self) :
    return len(self.inputData)

## Create PyTorch Regression Model

In [None]:
from torch import nn
import torch.nn.functional as F

In [None]:
class myModel(nn.Module) :
  def __init__(self, inputDim:int, targetDim:int, channels:int) :
    # Inheritance
    super(myModel, self).__init__()

    # Create MLP Layer Instance
    self.layer0 = nn.Linear(inputDim, channels)
    self.layer1 = nn.Linear(channels, channels*2)
    self.layer2 = nn.Linear(channels*2, channels*4)
    self.layer3 = nn.Linear(channels*4, channels*2)
    self.layer4 = nn.Linear(channels*2, channels)
    self.layer5 = nn.Linear(channels, targetDim)

  def forward(self, input) :
    output = F.relu(self.layer0(input))
    output = F.relu(self.layer1(output))
    output = F.relu(self.layer2(output))
    output = F.relu(self.layer3(output))
    output = F.relu(self.layer4(output))
    output = self.layer5(output)

    return output

## Create Training Option (Hyperparameter) Dictionary

In [None]:
opt = {"seed":42,
       "batchSize":128,
       "lr":1e-3,
       "epochs":50,
       "isCUDA":torch.cuda.is_available()}

## Train DL Model

In [None]:
from torch.utils.data import DataLoader
from torch import optim

from tqdm import tqdm

### Fix Seed

In [None]:
import random

In [None]:
def fixSeed(seed) :
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

In [None]:
fixSeed(opt["seed"])

## Create DataLoader Instance

In [None]:
trainDataLoader = DataLoader(myDataLoader(xTrain, yTrain), batch_size=opt["batchSize"], shuffle=True, drop_last=True)
validDataLoader = DataLoader(myDataLoader(xValid, yValid), batch_size=opt["batchSize"], shuffle=False, drop_last=False)

## Create Deep Learning Model Instance

In [None]:
fixSeed(opt["seed"])

In [None]:
model = myModel(xTrain.shape[1], 1, 64)

## Assign Device

In [None]:
if opt["isCUDA"] :
  model = model.cuda()

In [None]:
next(model.parameters()).device

## Visualize Model

In [None]:
from torchsummary import summary

In [None]:
summary(model, (1, xTrain.shape[1]))

## Create Optimizer Instance

In [None]:
optimizer = optim.Adam(model.parameters(), lr=opt["lr"])

## Create Loss Function Instance

In [None]:
criterion = nn.MSELoss()

## Create Average Meter Instance

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val*n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
trainLoss, validLoss = AverageMeter(), AverageMeter()

## Create List Instance for Adding Training Result

In [None]:
trainLossList, validLossList = [], []
bestLoss = torch.inf

## Train Model

In [None]:
for epoch in range(1, opt["epochs"]+1) :
    trainBar = tqdm(trainDataLoader)
    trainLoss.reset()

    for data in trainBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      optimizer.zero_grad()
      pred = model(input)
      loss = criterion(pred, target)
      loss.backward()
      optimizer.step()

      trainLoss.update(loss.item(), opt["batchSize"])
      trainBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Train] < Loss:{trainLoss.avg:.15f} >")

    trainLossList.append(trainLoss.avg)

    validBar = tqdm(validDataLoader)
    validLoss.reset()

    for data in validBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      model.eval()
      with torch.no_grad() :
        pred = model(input)
        loss = criterion(pred, target)

        validLoss.update(loss.item(), opt["batchSize"])
        validBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Valid] < Loss:{validLoss.avg:.15f} >")

    validLossList.append(validLoss.avg)

    if validLoss.avg < bestLoss :
      bestLoss = validLoss.avg
      torch.save(model.state_dict(), "bestModel.pth")

    torch.save(model.state_dict(), "latestModel.pth")

In [None]:
print(f"Best MSE Loss : {bestLoss:.12f}")

## Plot Training vs. Validation Loss Graph

In [None]:
plt.figure(figsize=(20,10))

plt.plot(np.arange(0, opt["epochs"], 1), trainLossList, label="Training Loss")
plt.plot(np.arange(0, opt["epochs"], 1), validLossList, label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.legend(loc="best")

plt.show()

## Load Saved Model

In [None]:
weights = torch.load("/content/bestModel.pth")

In [None]:
model = myModel(xTrain.shape[1], 1, 64)
model.load_state_dict(weights)
if opt["isCUDA"] :
  model = model.cuda()

## Inference Test Input Data

In [None]:
xTestTensor = torch.as_tensor(xTest).float()

predList = []

model.eval()

with torch.no_grad() :
  with tqdm(total=xTestTensor.shape[0]) as pBar :
    for inputData in xTestTensor :
      inputData = inputData.cuda()

      pred = model(inputData)
      predList.append(pred.detach().cpu().item())

      pBar.update()

## Plot Bar Chart for Comparison

In [None]:
def plotBarChart(yTest, yTestHat) :
  fig, ax = plt.subplots(figsize = (20,10))
  idx = np.asarray([i for i in range(50)])
  width = 0.2

  ax.bar(idx, yTest[:50], width = width)
  ax.bar(idx+width, yTestHat[:50], width = width)
  ax.set_xticks(idx)
  ax.legend(["Ground Truth", "Prediction"])
  ax.set_xlabel("# samples")
  ax.set_ylabel("Value")

  fig.tight_layout()
  plt.show()

In [None]:
plotBarChart(yTest, predList)

## Training Code as a Function (Abstraction)

In [None]:
def train(opt, dataset, criterion) :
  fixSeed(opt["seed"])

  trainDataLoader = DataLoader(myDataLoader(dataset["xTrain"], dataset["yTrain"]), batch_size=opt["batchSize"], shuffle=True, drop_last=True)
  validDataLoader = DataLoader(myDataLoader(dataset["xValid"], dataset["yValid"]), batch_size=opt["batchSize"], shuffle=False, drop_last=False)

  fixSeed(opt["seed"])
  model = myModel(xTrain.shape[1], 1, 64)
  if opt["isCUDA"] :
    model = model.cuda()

  summary(model, (1, dataset["xTrain"].shape[1]))

  optimizer = optim.Adam(model.parameters(), lr=opt["lr"])

  trainLoss, validLoss = AverageMeter(), AverageMeter()
  trainLossList, validLossList = [], []
  bestLoss = torch.inf

  for epoch in range(1, opt["epochs"]+1) :
    trainBar = tqdm(trainDataLoader)
    trainLoss.reset()

    for data in trainBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      optimizer.zero_grad()
      pred = model(input)
      loss = criterion(pred, target)
      loss.backward()
      optimizer.step()

      trainLoss.update(loss.item(), opt["batchSize"])
      trainBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Train] < Loss:{trainLoss.avg:.6f} >")

    trainLossList.append(trainLoss.avg)

    validBar = tqdm(validDataLoader)
    validLoss.reset()

    for data in validBar :
      input, target = data["input"], data["target"]
      if opt["isCUDA"] :
        input, target = input.cuda(), target.cuda()

      model.eval()
      with torch.no_grad() :
        pred = model(input)
        loss = criterion(pred, target)

        validLoss.update(loss.item(), opt["batchSize"])
        validBar.set_description(desc=f"[{epoch}/{opt['epochs']}] [Valid] < Loss:{validLoss.avg:.6f} >")

    validLossList.append(validLoss.avg)

    if validLoss.avg < bestLoss :
      bestLoss = validLoss.avg
      torch.save(model.state_dict(), "bestModel.pth")

    torch.save(model.state_dict(), "latestModel.pth")

  return trainLossList, validLossList

## MAE vs. MSE

In [None]:
x = np.arange(-1.5, 1.5, 1e-2)
mae = np.abs(x)
mse = np.power(x, 2)

In [None]:
plt.figure(figsize=(20,10))

plt.plot(x, mae, label="MAE Loss")
plt.plot(x, mse, label="MSE Loss")

plt.xlabel("Input")
plt.ylabel("Loss")
plt.legend(loc="best")

plt.show()

## Train Model

In [None]:
trainLossList, validLossList = train(opt,
                                     {"xTrain":xTrain, "yTrain":yTrain, "xValid":xValid, "yValid":yValid},
                                     nn.L1Loss())

## Plot Training vs. Validation Loss Graph

In [None]:
plt.figure(figsize=(20,10))

plt.plot(np.arange(0, opt["epochs"], 1), trainLossList, label="Training Loss")
plt.plot(np.arange(0, opt["epochs"], 1), validLossList, label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("MAE Loss")
plt.legend(loc="best")

plt.show()

## Inference Code as a Function (Abstraction)

In [None]:
def inference(opt, inputData, modelPath) :
  weights = torch.load(modelPath)

  model = myModel(xTrain.shape[1], 1, 64)
  model.load_state_dict(weights)
  if opt["isCUDA"] :
    model = model.cuda()

  inputDataTensor = torch.as_tensor(inputData).float()

  predList = []

  model.eval()

  with torch.no_grad() :
    with tqdm(total=inputData.shape[0]) as pBar :
      for inputData in inputDataTensor :
        if opt["isCUDA"] :
          inputData = inputData.cuda()

        pred = model(inputData)
        predList.append(pred.detach().cpu().item())

        pBar.update()

  return predList

## Inference Result

In [None]:
predList = inference(opt, xTest, "/content/bestModel.pth")

## Plot Bar Chart for Comparison

In [None]:
plotBarChart(yTest, predList)