# Muiti-Label Classification

## Import Packages

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
import os
import glob
import pandas as pd
from collections import Counter
from copy import deepcopy
import matplotlib.pyplot as plt
from PIL import Image
import time
import random

## Setup seed

In [None]:
# device setting
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print (device)

seed = 42 # seed value setting
random.seed(seed) # Python random number generator 
os.environ['PYTHONHASHSEED'] = str(seed) # Fixed hash secret value
np.random.seed(seed) # Numpy random number generator 

torch.manual_seed(seed) # Pytorch CPU random number generator
torch.backends.cudnn.deterministic = True # deterministic operation setting
torch.backends.cudnn.benchmark = False   # benchmark function setting
torch.backends.cudnn.enabled = False        # cudnn function setting

if device == 'cuda':
    torch.cuda.manual_seed(seed) # Pythorch GPU Random Number Generator
    torch.cuda.manual_seed_all(seed) # Pythorch Multi-GPU Random Number Generator

## Connect Google Drive (When using Colab)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Make validation, test set

In [None]:
# Enter Paths the file path of train.csv, validation.csv, and test.csv.
train_dataset = pd.read_csv('./CSV_files/Deep_learning/train.csv')
validation_dataset = pd.read_csv('./CSV_files/Deep_learning/train.csv')
test_dataset = pd.read_csv('./CSV_files/Deep_learning/train.csv')

In [None]:
# Make train, validation, test list
train_list = train_dataset.values.tolist()
val_list = validation_dataset.values.tolist()
test_list = test_dataset.values.tolist()

print(type(train_list))
print(type(val_list))
print(type(test_list))
test_list[0], val_list[0], train_list[0]

## Prepare dataset(Transforms and Custom Dataset)

In [None]:
from torchvision import transforms

input_size = 480
transforms_for_train =  transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

transforms_for_val_test = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

#class Dataset
class CustomDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
    
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        img_path = self.file_list[idx][5]
        label =  self.file_list[idx][2]
        img = Image.open(img_path).convert('RGB')
        if self.transform is not None:
            img_transform = self.transform(img) 
            if label == 'Low':
                  label = 0
            elif label == 'Medium':
                  label = 1
            elif label == 'High':
                  label = 2
        return img_transform, label, img_path

dataset_train = CustomDataset(train_list, transform=transforms_for_train)
dataset_valid = CustomDataset(val_list, transform=transforms_for_val_test)
dataset_test = CustomDataset(test_list, transform=transforms_for_val_test)

from torch.utils.data import DataLoader #DataLoader class

minibatch_size = 16

train_batches = DataLoader(dataset=dataset_train, batch_size=minibatch_size, shuffle=True, num_workers=4, drop_last = True)
val_batches = DataLoader(dataset=dataset_valid, batch_size=minibatch_size, shuffle=False, num_workers=4, drop_last = True)
test_batches = DataLoader(dataset=dataset_test, batch_size=minibatch_size, shuffle=False, num_workers=4, drop_last = True)

## Create Model

In [None]:
# For Colab
!pip install transformers

In [None]:
# EfficientNetV2-l model

from torchvision.models import *

model = efficientnet_v2_l(weights="DEFAULT")
model.classifier = nn.Sequential (
            nn.Linear(1280, 3, bias=True)
)
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10 - 1, eta_min=0, last_epoch=-1)

In [None]:
# Functions of train and validation

from tqdm import tqdm

def train_model(model, criterion, optimizer, early_stop, epochs, train_loader, valid_loader):
    train_losses, train_accuracies, valid_losses, valid_accuracies, lowest_loss, lowest_epoch = list(), list(), list(), list(), np.inf, 0
    
    # DEBUG
    progress_count = 0

    for epoch in range(epochs):
        train_loss, train_accuracy, train_corrects, valid_loss, valid_accuracy, valid_corrects = 0, 0, 0, 0, 0, 0
        train_correct, valid_correct = 0, 0

        start = time.time()
        model.train()
        for train_x, train_y, img_path in tqdm(train_loader):
            train_x = train_x.to(device)
            train_y = train_y.to(device)
            pred = model(train_x)
            loss = criterion(pred, train_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            train_loss += loss.item()
            
            y_pred = torch.softmax(pred.detach().cpu(), dim=1)
            y_pred = torch.argmax(y_pred, dim=1)

            train_correct += y_pred.eq(train_y.detach().cpu()).sum().item()

        train_loss = train_loss / len(train_loader)
        train_losses.append(train_loss)
        train_accuracy = train_correct / len(train_loader.dataset)
        train_accuracies.append(train_accuracy)

        model.eval()
        with torch.no_grad():
            for valid_x, valid_y, img_path in tqdm(valid_loader):
                valid_x = valid_x.to(device)
                valid_y = valid_y.to(device)
                pred = model(valid_x)
                loss = criterion(pred, valid_y)
                valid_loss += loss.item()
                
                y_pred = torch.softmax(pred.detach().cpu(), dim=1)
                y_pred = torch.argmax(y_pred, dim=1)
                
                valid_correct += y_pred.eq(valid_y.detach().cpu()).sum().item()

        valid_loss = valid_loss / len(valid_loader)
        valid_losses.append(valid_loss)
        valid_accuracy = valid_correct / len(valid_loader.dataset)
        valid_accuracies.append(valid_accuracy)
        
        elapsed_time = time.time() - start
        print(f'[Epoch {epoch+1}/{epochs}]: {elapsed_time:.3f} sec(elapsed time), train loss: {train_losses[-1]:.4f}, train acc: {train_accuracy * 100:.3f}% / valid loss: {valid_losses[-1]:.4f}, valid acc: {valid_accuracy * 100:.3f}%')
        print(optimizer.param_groups[0]["lr"])
        if valid_losses[-1] < lowest_loss:
            lowest_loss = valid_losses[-1]
            lowest_epoch = epoch
            best_model = deepcopy(model.state_dict())
        else:
            if (early_stop > 0) and lowest_epoch + early_stop < epoch:
                print ("Early Stopped", epoch, "epochs")
                break
        


    model.load_state_dict(best_model)        
    return model, lowest_loss, train_losses, valid_losses, train_accuracies, valid_accuracies

## Training

In [None]:
model, lowest_loss, train_losses, valid_losses, train_accuracies, valid_accuracies = train_model(model, loss_func, optimizer, 0, 10, train_batches, val_batches)

## Save Model

In [None]:
# Enter a file path of Dataset/models
PATH = './models/'
torch.save(model.state_dict(), PATH + '622raw_3Classes_model_efficientnetv2_l_adam_cosine1e5_epoch10_batch16.pth')

## Load Model

In [None]:
# Enter a file path of Dataset/models
PATH = './models/'
model.load_state_dict(torch.load(PATH + '622raw_3Classes_model_efficientnetv2_l_adam_cosine1e5_epoch10_batch16.pth'))

## Predict(Testing)

In [None]:
test_loss = 0
correct = 0
wrong_samples, wrong_preds, actual_preds = list(), list(), list()
test_loss, test_accuracy, test_corrects = 0, 0, 0
test_correct, test_correct = 0, 0
preds = []

model.eval()
with torch.no_grad():
    for test_x, test_y, img_path in tqdm(test_batches):
        test_x = test_x.to(device)
        test_y = test_y.to(device)      
        pred = model(test_x)

        loss = torch.sqrt(loss_func(pred, test_y))
        test_loss += loss.item()
        
        y_pred = torch.softmax(pred.detach().cpu(), dim=1)
        y_pred = torch.argmax(y_pred, dim=1)
        preds.append(y_pred.item())

        test_correct += y_pred.eq(test_y.detach().cpu()).sum().item()
        test_x = test_x.detach().cpu()
        
        wrong_idx = y_pred.ne(test_y.detach().cpu()).nonzero()[:, 0].cpu().numpy().tolist()
        for index in wrong_idx:
            wrong_samples.append(img_path[index])
            wrong_preds.append(y_pred[index].cpu())
            actual_preds.append(test_y[index].cpu())
            
test_loss /= len(test_batches)
print('Average Test Loss: {:.4f}'.format( test_loss ))
print('Accuracy: {}/{} ({:.2f}%)'.format( test_correct, len(test_batches.dataset), 100 * test_correct / len(test_batches.dataset) ))

## Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd


y_pred = []
y_true = []


# iterate over test data
for inputs, labels, img_path in tqdm (test_batches):
        inputs = inputs.to(device)
        output = model(inputs)# Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) # Save Predictionx
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) # Save Truth
        
# constant for classes
classes = ('Low', 'Medium', 'High')

# Build confusion matrix
cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (12,7))
sn.heatmap(df_cm, annot=True)
plt.savefig('./Dataset/output.png')

## Showing incorrect data

In [None]:
# Showing 100 images of incorrect datas
import matplotlib.pyplot as plt
import cv2 as cv
# %matplotlib inline

plt.figure(figsize=(28 , 28))

wrong = len(wrong_samples)

for index in range(wrong):
    plt.subplot(10, 10, index + 1)
    plt.axis('off')
    img = plt.imread(wrong_samples[index])
    plt.imshow(img)
    plt.title("Pred" + str(wrong_preds[index].item()) + "(" + str(actual_preds[index].item()) + ")", color='red')