<a href="https://colab.research.google.com/github/Shim-John/Meltpool_Image-Prediction/blob/main/K_Fold_MPD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install openpyxl==3.0.9



In [None]:
%matplotlib inline
import os
import matplotlib.pyplot as plt
import cv2
from google.colab import drive, files
import sklearn
from IPython.display import Image 
from math import sqrt
import numpy as np
from PIL import Image
import torch
import glob
import pandas as pd
import torch.nn as nn 
import torch.nn.functional as F 
from torch.utils.data import DataLoader, ConcatDataset
from torch.autograd import Variable
import torch.utils.data as Data
from torch.utils.data import Dataset
import numpy as np, imageio
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
import time
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import KFold
device = ("cuda" if torch.cuda.is_available() else "cpu")
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
class mpwDataset(Dataset):
    def __init__(self, root_dir, annotation_file, sheet, transform_img=None):
        self.root_dir = root_dir
        self.sheet = sheet
        self.annotations = pd.read_excel(annotation_file, sheet_name=sheet)
        self.transform_img = transform_img
        self.label = self.annotations.iloc[:, 2].tolist()

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
        y_label = torch.tensor(self.annotations.iloc[index, 2])
        if self.transform_img is not None:
            img = self.transform_img(img)
        return (img, y_label)


In [None]:
dir = '/content/drive/MyDrive/johnshim_mpd_data/image_train_v5/'
path = '/content/drive/MyDrive/johnshim_mpd_data/mpd.xlsx'


transform_train = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.0339, 0.0765, 0.5810],
                             [0.1614, 0.2144, 0.1777])
    ])

transform_val = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.0338, 0.0764, 0.5780],
                             [0.1612, 0.2143, 0.1773])
    ])

train_dataset = mpwDataset(dir, path, 'TRAIN', transform_img = transform_train)
val_dataset = mpwDataset(dir, path, 'TEST', transform_img = transform_val)
dataset = ConcatDataset([train_dataset, val_dataset])

mpd_max = pd.read_excel(path, sheet_name='RANGE').iloc[0, 4]
mpd_min = pd.read_excel(path, sheet_name='RANGE').iloc[0, 5]

In [None]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):

    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet50
        """
        model_ft = models.resnet50(pretrained=use_pretrained)
        num_ftrs = model_ft.fc.in_features

        seq_model = nn.Sequential(
                  
                  nn.Linear(num_ftrs, num_classes),
                  nn.ReLU(),
                  nn.Linear(num_classes, 20),
                  nn.ReLU(),
                  nn.Linear(20, 1)
                )
        model_ft.fc = seq_model
        input_size = 224
    return model_ft, input_size
    
model_ft, input_size = initialize_model('resnet', 40, feature_extract = True, use_pretrained=True)
model_ft = model_ft.to(device)    

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

### Hyperparameters

In [None]:
criterion = nn.L1Loss()     #Mean Absolute Error (MAE)
lr = 0.0001
num_epochs = 120

In [None]:
k_folds = 5
#kfold = KFold(n_splits=k_folds, shuffle=True)

kfold = KFold(n_splits=k_folds)
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.

    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = DataLoader(
                      dataset, 
                      batch_size=20, sampler=train_subsampler, pin_memory = True)
    testloader = DataLoader(
                    dataset,
                     batch_size=20, sampler=test_subsampler, pin_memory = True)   

    # Define Model
    

    model_ft, input_size = initialize_model('resnet', 40, feature_extract = True, use_pretrained=True)
    model_ft = model_ft.to(device)
    optimizer = optim.SGD(model_ft.parameters(), lr=lr, momentum=0.9)
    scheduler = StepLR(optimizer, step_size=100, gamma=0.8)

    #Train and Evaluation
    total_loss_train = []
    total_loss_val = []

    for e in range(num_epochs):
        print('Epoch {}/{}'.format(e+1, num_epochs))
        print('LR:', scheduler.get_last_lr())
        print('-' * 10)


        model_ft.train() 
        train_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.cuda(), labels.cuda()
              
            # Clear the gradients
            optimizer.zero_grad()
            # Forward Pass
            outputs = model_ft(inputs)
            labels = labels.unsqueeze(1).float()
            # Find the Loss
            loss = criterion(outputs, labels)
            # Calculate gradients 
            loss.backward()
            # Update Weights
            optimizer.step()
            # Calculate Loss
            train_loss += loss.item() * inputs.size(0)
        total_loss_train.append(train_loss / len(trainloader))

        val_loss = 0.0
        model_ft.eval()     # Optional when not using Model Specific layer
        with torch.no_grad():
          for i, data in enumerate(testloader, 0):
              # get the inputs; data is a list of [inputs, labels]
              inputs, labels = data
              inputs, labels = inputs.cuda(), labels.cuda()
                
              # Forward Pass
              outputs = model_ft(inputs)
              labels = labels.unsqueeze(1).float()
              # Find the Loss
              loss = criterion(outputs,labels)
              # Calculate Loss
              val_loss += loss.item() * inputs.size(0)
        total_loss_val.append(val_loss / len(testloader))  
        print(f'\t Training Loss: {\
        train_loss / len(testloader)} \t\t Validation Loss: {\
        val_loss / len(testloader)}')
        
        scheduler.step()    
    print('Finished Training')

    x = np.array([])
    y = np.array([])
    for (inputs, labels) in testloader:
      ## get the inputs; data is a list of [inputs, labels]
      inputs = inputs.cuda()
      labels = labels.cuda()
      output = model_ft(inputs)

      output = output.detach().cpu().numpy()
      labels = labels.detach().cpu().numpy()

      x = np.append(x,labels)
      y = np.append(y,output)
    rsquared = sklearn.metrics.r2_score(x,y)
    mse_err = sklearn.metrics.mean_squared_error(x,y)

    print('lr = ' + str(lr))
    print('rsquared = ' + str(rsquared))
    print('MSE Error = ' + str(mse_err))

FOLD 0
--------------------------------
Epoch 1/120
LR: [0.0001]
----------
	 Training Loss: 24.799797628236853 		 Validation Loss: 2.5830928592578224
Epoch 2/120
LR: [0.0001]
----------
	 Training Loss: 6.913257343613583 		 Validation Loss: 1.208244719259117
Epoch 3/120
LR: [0.0001]
----------
	 Training Loss: 4.51859032654244 		 Validation Loss: 1.2049789694340334
Epoch 4/120
LR: [0.0001]
----------
	 Training Loss: 4.058436945080757 		 Validation Loss: 0.814689179153546
Epoch 5/120
LR: [0.0001]
----------
	 Training Loss: 3.7425628777431403 		 Validation Loss: 0.8871647067691969
Epoch 6/120
LR: [0.0001]
----------
	 Training Loss: 3.617624678041624 		 Validation Loss: 0.7629747073287549
Epoch 7/120
LR: [0.0001]
----------
	 Training Loss: 3.5426707222409872 		 Validation Loss: 0.6872681107210077
Epoch 8/120
LR: [0.0001]
----------
	 Training Loss: 3.346423494103162 		 Validation Loss: 0.6247089932794156
Epoch 9/120
LR: [0.0001]
----------
	 Training Loss: 2.8881652165690195 		 Valid