# Group information


Group 3 (Project 1)

Name (SID): 

- Lam Yee Chun (20538053)
- Yeung Tsz Ching (20507377)
- Ng Pak Nin (20517748)
- Ng Hung Hing (20354823)

# Package installation & Data Load

####For Google colab (Comment all below cells for non colab users)

In [None]:
!pip install pytorch-lightning

####Import

In [None]:
import pickle
import time
import os
import random
import math

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

from collections import OrderedDict
from sklearn.metrics import fbeta_score,precision_score,recall_score
from PIL import Image

In [None]:
assert os.environ['COLAB_GPU'], 'Make sure to select GPU from Edit > Notebook settings > Hardware accelerator'
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!rm -f data
!ln -s '/content/drive/My Drive/proj1_data' proj1_data

####Dataset with data augmentation


In [None]:

class MyDataset(Dataset):

    '''
      usage:
      train_set = MyDataset(train_folder, transform=train_transform)
      val_set = MyDataset(val_folder, transform=val_transform)
    '''

    def __init__(self, folders, transform=None):
        self.folders, self.labels = self.get_labels(folders)
        self.transform = transform

    def __len__(self):
        return len(self.folders)
    
    def __getitem__(self, idx):
        folder_id = self.folders[idx]
        
        try:
            with open(folder_id, 'rb') as f:
                color_img = pickle.load(f)
        except EOFError:
            print(folder_id)

        # img process
        for i in range(color_img.shape[0]):
            if np.ptp(color_img[i, :, :]) != 0:
                color_img[i,:,:] = (color_img[i,:,:] - np.min(color_img[i,:,:])) / np.ptp(color_img[i,:,:])
        color_img = color_img.transpose((1,2,0))

        # img process
        c1 = color_img[:,:,0]
        c2 = color_img[:,:,1]
        c3 = color_img[:,:,2]
        c4 = color_img[:,:,3]
        
        c1 = np.expand_dims(c1, axis=2)
        c2 = np.expand_dims(c2, axis=2)
        c3 = np.expand_dims(c3, axis=2)
        c4 = np.expand_dims(c4, axis=2)

        color_img = np.concatenate((c1, c2, c3, c4), axis=2)
        for i in range(color_img.shape[2]):
            if np.ptp(color_img[:,:,i]) == 0:
                continue
            color_img[:,:,i] = (color_img[:,:,i] - np.min(color_img[:,:,i])) / np.ptp(color_img[:,:,i])

        # img process, tranform
        if self.transform is not None:
            color_img = np.uint8(255*color_img)
            color_img = Image.fromarray(color_img)
            color_img = self.transform(color_img)

        return color_img, self.labels[idx]


    def get_labels(self, folders):

        files = []
        labels = []
        #class_num = []

        #conding=utf8  
        g = os.walk(folders)  

        for path, _, file_list in g:  
            for file_name in file_list:
                files.append(os.path.join(path, file_name))
                if 'nil_HS_H08' in file_name or 'light_HS_H08' in file_name:
                    labels.append(0)
                    #class_num.append(591)
                elif 'moderate_HS_H08' in file_name:
                    labels.append(1)
                    #class_num.append(839)
                else:
                    labels.append(2)
                    #class.num.append

        return files, labels

    def get_cls_num_list(self, folders):
        _, class_list = self.get_labels(folders)
        class_num_list = []
        for i in range(3):
            class_num_list.append(class_list.count(i)*3)
        return class_num_list
        

#### **Dataset load and basic data augmentation**


> We initialize the training set and validation set as MyDataset and resize the data in the two sets to handle the data with different sizes. We also decided to normalize the data to bring them to a common scale and reduce variance to allow better model performance.


> To ease the problem of overfitting, we have implemented a set of data augmentation strategies. We apply a data transformer which will randomly apply rotation, flipping and cropping. We chose not to include color jittering into the data transformer since it gives a worse training and validation performance by trials.


> We tried doubling and tripling the dataset with transformed augmented data. By tripling the data size with the augmented data, it gives a higher validation accuracy and better reduction of the overfitting problem.






In [None]:
#Dataset load and basic data augmentation

#For non-colab
#train_folder = 'train'
#val_folder = 'validate'
#---------------------#
#For colab
train_folder = 'train'
val_folder = 'validate'
#---------------------#

resize_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5307, 0.4912, 0.5074, 0.5072), (0.2738, 0.2922, 0.2976, 0.2993))
])
train_smallset = MyDataset(train_folder, transform=resize_transform)
val_set = MyDataset(val_folder, transform=resize_transform)

###data augmentation
aug_transform = transforms.Compose([
    transforms.RandomApply([
                transforms.RandomRotation(10), 
                transforms.RandomHorizontalFlip(), 
                transforms.RandomVerticalFlip(), 
                transforms.RandomCrop((150,150))
            ]),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])
all_datasets = []
all_datasets.append(train_smallset)
for i in range(2):
    aug_train_set = MyDataset(train_folder, transform=aug_transform)
    all_datasets.append(aug_train_set)
train_set = torch.utils.data.ConcatDataset(all_datasets)
print("train shape:", train_set.__len__())
print("val shape:", val_set.__len__())

#### Util Function

### Classification report Utility
This is a ultility function for benchmarking the classification performance of a model on a given dataset.

In [None]:
# classification report utility

def evaluate(model, dataset):
    data = DataLoader(dataset, batch_size=20, num_workers=4, shuffle=True)
    Y_hat_list = torch.Tensor().cuda()
    Y_list = torch.Tensor()
    model.cuda()
    for X, Y in data:
        logits = model(X.cuda())
        _, Y_hat = torch.max(logits, 1)
        Y_hat_list = torch.cat((Y_hat_list, Y_hat), 0)
        Y_list = torch.cat((Y_list, Y), 0)
    print(classification_report(Y_list.cpu().numpy(), Y_hat_list.cpu().detach().numpy()))
    return classification_report(Y_list.cpu().numpy(), Y_hat_list.cpu().detach().numpy(), output_dict=True)

### Image data preview utility

In [None]:
def get_random_sample(dataset):
  random.seed()
  index = random.randrange(dataset.__len__())
  sample_image,_ = dataset.__getitem__(index)
  return sample_image, index

def get_one_sample(dataset, index):
  sample_image,_ = dataset.__getitem__(index)
  return sample_image

def plot_random(dataset):
  sample_image,index = get_random_sample(dataset)
  print('Sample index:', index,'Class:',train_Y[index])
  for layers in range(4):
    plt.subplot(2,2,layers+1)
    title_obj  = plt.title("Layer %i" % (layers + 1))
    plt.tight_layout()
    plt.setp(title_obj, color='w')
    plt.imshow(sample_image[layers])


# Baseline 1

*Strategy*

####**Model Framework: PyTorch Lightning**
We use PyTorch Lightning(https://www.pytorchlightning.ai) as our model framework because it allows us to modularize our code into dedecated steps and functions, enabling fast prototyping and expansion while maintaining readability.

####**Model Structure: Convolutional neural network**
Our model network consists of three parts: self defined convolutional layers, Resnet model,  fully connected layers

####*Self defined convolutional layers:*
As we are training data that are 4-band multispectral images, we defined one convolutional layer to extract 4-channel inputs into 3 feature maps. With a 3-channel output, we are able to pass the output to Resnet which requires a 3-band input.
After trying adding 2 convolutional layers, we concluded that having one layer before the Resnet network gives the best result. After trying adding 2 convolutional layers, we concluded that having one layer before the Resnet network gives the best result. The following graph shows the training accuracy with the model using 2 conv layers can only reach around 70%, while our finalized baselines are able to obtain more than 75%.
###*2 conv layer*
###![2convlayer](graphs/2convlayer.jpeg)

####*Resnet model:*
We tried training our model with two different version of Resnet. 
As shown as below, although ResNet34 gives a better result in training performance, ResNet50 gives higher validation accuracy and lower loss.

ResNet50 gives an average validation accuracy of 79%, which is 6% higher than ResNet34, which is 73% (see graph below).
As we can see, a more complex ResNet yields a better result. From this hypothesis, we tried to test ResNet101, but the ust lab GPU runs out of memory for this model, so we settled on ResNet50.
###*resnet34 vs resnet50*
#####![resnet34 vs resnet50](graphs/resnet.jpeg)

We use this pretrained ResNet model as a feature extracter for the images, so all layers of the resnet are freezed except the last fc layer, where we retrain it to classify our images into their corresponding three classes.

####*Fully Connected layer:*
We defined two FC layers after Resnet for classification.
With Resnet outputting size 1000 output, the first FC layer allow input size 1000 and output 512.
The second FC layer are used to classify the 512 sized input into size 3 output, which will undergo a softmax function. The size 3 output will be the model’s probability of predicting the 3 classes : SEV, MOD, NIL.
We have tried using only one FC layer to classify the size 1000 input into 3 classes. However, the result yielded is not as good as using two FC layers.

####*Optimizer:*
We chose AdamW as the optimzer.
We tried using SGD optimizer before, the following graphs shows the training accuracy only reach 50% while validation accuracy fluctuates and drops to 25%. Comparing to AdamW we are using in our finalized baseline1, SGD gives a very bad performance.
### *SGD Optimizer Performance*
####![SGDopt](graphs/SGDopt.png)

In [None]:
class BL1(pl.LightningModule):
    def __init__(self, lr = 0.05, oversample=False, verbose = False):
        super(BL1, self).__init__()
        self.oversample = oversample
        self.lr = lr
        self.verbose = verbose
        self.loss_func = nn.CrossEntropyLoss()
        
        self.conv1 = nn.Conv2d(4, 3, 5)
        self.resnet = models.resnet50(pretrained = True)
        
        # freeze all layers in resnet
        for parameter in self.resnet.parameters():
            parameter.requires_grad = False
        self.resnet.fc.requires_grad=True
        
        self.fc1 = nn.Linear(1000, 512)
        self.fc2 = nn.Linear(512, 3)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.resnet(x)
        x = F.leaky_relu(self.fc1(x))
        x = self.softmax(self.fc2(x))
        return x
    
    def training_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.loss_func(output,target)
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return {'loss': loss, 'acc': acc}

    def validation_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.loss_func(output,target)
        self.log('val_loss', loss)
        self.log('val_acc', acc)
        return {'loss': loss, 'acc': acc}

    def training_epoch_end(self, training_step_outputs):
        loss_list = []
        acc_list = []
        for output in training_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('train_loss:', sum(loss_list)/len(loss_list), 'train_acc:', sum(acc_list)/len(acc_list))
        
    def validation_epoch_end(self, validation_step_outputs):
        loss_list = []
        acc_list = []
        for output in validation_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('        validation_loss:', sum(loss_list)/len(loss_list), 'validation_acc:', sum(acc_list)/len(acc_list))
        
    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr)
    
    def train_dataloader(self):
        dataset = train_set
        if self.oversample:
            return DataLoader(dataset, batch_size=50, num_workers=8, sampler=ImbalancedDatasetSampler(dataset))
        else:
            return DataLoader(dataset, batch_size=50, num_workers=8, shuffle=True)

    def val_dataloader(self):
        dataset = val_set
        return DataLoader(dataset, batch_size=40, num_workers=8, shuffle=False)

# Train Model
""" 
model = BL1(lr=0.0002)
trainer = Trainer(
    gpus=1,
    progress_bar_refresh_rate = 20,
    callbacks=[EarlyStopping(
          monitor='val_loss',
          patience=30,
        min_delta=0.00,
        verbose=False
      )],
      num_sanity_val_steps=0
)
trainer.fit(model)
model.eval()
"""

model = torch.load('model_bl1.pkl')

Classification Report

Training:

In [None]:
evaluate(model, train_set)

Validation:

In [None]:
evaluate(model, val_set)

# Baseline 2

LDAM Loss

The function modifies the model output by reducing output probability of the correct target class then put into the cross entropy loss. Which the LDAM loss will just affects the gradient of the data in a more involved way than only introducing a scalar weighting factor as the scalar introduced by LDAM loss also depends on the output of the model. 

- $L_{LDAM}((x,y);f) = -log\frac{e^{z_y-\Delta_y}}{e^{z_y-\Delta_y}+\sum_{j\neq y}e^{z_j}}$ where $\Delta_j = \frac{C}{n_j^{1/4}}$ for $j\in 1,2,3$
- Hyperparameter C

C is the hyperparameter we tuned by sweeping in different values. According to the paper and the GitHub provided by the paper, it represent the complexity of the dataset theoretically such that C could be normalised by the max number of data among the classes. However, since calculating the Rademacher complexity is complicated. Therefore, we try C with different values and found out C = 5 have a better performance.

In [None]:
class BL2(pl.LightningModule):
    def __init__(self, 
                 C = 0,
                 lr = 0.05,
                 oversample=False,
                 verbose = False
                ):
        super(BL2, self).__init__()
        self.oversample = oversample
        self.lr = lr
        self.verbose = verbose
        self.C = C
        self.cls_num_list = None #torch.empty(1, 3)
        self.cls_weight_list = None #torch.empty(1, 3)
        self.loss_func = nn.CrossEntropyLoss(weight = self.cls_weight_list)
        
        self.conv1 = nn.Conv2d(4, 3, 5)
        self.resnet = models.resnet50(pretrained = True)
        
        # freeze all layers in resnet
        for parameter in self.resnet.parameters():
            parameter.requires_grad = False
            
        # redefine the last fc layer in resnet
        #self.resnet.fc = nn.Linear(2048, 1024)
        self.resnet.fc.requires_grad=True
        
        self.fc1 = nn.Linear(1000, 512)
        self.fc2 = nn.Linear(512, 3)
        self.softmax = nn.Softmax(dim=1)
        

    def forward(self, x):
        x = self.conv1(x)
        x = self.resnet(x)
        x = F.leaky_relu(self.fc1(x))
        x = self.softmax(self.fc2(x))
        return x
    
    def training_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.LDAM(output,target)
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return {'loss': loss, 'acc': acc}

    def validation_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.LDAM(output,target)
        self.log('val_loss', loss)
        self.log('val_acc', acc)
        return {'loss': loss, 'acc': acc}

    def cls_num_weight_list(self,state):
        if state == 'train':
            num_list = train_smallset.get_cls_num_list(train_folder) #Size : [1,class size]
        elif state == 'test' or state == 'val':
            num_list = val_set.get_cls_num_list(val_folder) #Size : [1,class size] 

        #cls_weight_list
        beta = 0
        effective_num = 1.0 - np.power(beta, num_list)
        weight_list = (1.0 - beta) / np.array(effective_num)
        weight_list = weight_list / np.sum(weight_list) * len(num_list)
        weight_list = torch.FloatTensor(weight_list)       
        #---------
        self.cls_num_list = num_list
        self.cls_weight_list = weight_list

    def LDAM(self, before_m, target): # output Size : [batch size,class size] , target Size: [1,batch size]
        #Initialization
        C = self.C
        cls_num_list = self.cls_num_list
        
        #LDAM
        m_list = 1.0 / np.sqrt(np.sqrt(cls_num_list)) #1/n_j^(1/4)
        #print(m_list)
        m_list = m_list * (C / np.max(m_list)) #Size : [1,3] source: m_list = m_list * (C / np.max(m_list))
        m_list = torch.cuda.FloatTensor(m_list) #type change to tensor ,Size : [1,class size]
        #print(m_list)
        index = torch.zeros_like(before_m, dtype=torch.uint8) #create array ,Size : [batch size,class size]
        #print('index before',index)
        index.scatter_(1, target.data.view(-1, 1), 1)# one-hot of target, Size : [batch size,class size]
        #print('target',target)    
        #print('index after',index)    
        index_float = index.type(torch.cuda.FloatTensor)# index type: int -> float Size : [batch size,class size]
        batch_m = torch.matmul(m_list[None, :], index_float.transpose(0,1)) # m_list[1,class size] * index_float[class size,batch size] ,batch_m Size : [1,batch size]
        #print('batch_m',batch_m)
        batch_m = batch_m.view((-1, 1)) # Transpose Size : [batch size,1]
        #print('batch_m',batch_m)
        #print('Before',before_m) 
        x_m = before_m - batch_m  
        
        #print('x_m',x_m)  
        after_m = torch.where(index, x_m, before_m) # replace z_y by z_y-delta_y only at the target class
        #print('where',after_m)
        #print('After',after_m - before_m) 
        #print(self.cls_weight_list)
        return self.loss_func(after_m,target)

    def training_epoch_end(self, training_step_outputs):
        loss_list = []
        acc_list = []
        for output in training_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('train_loss:', sum(loss_list)/len(loss_list), 'train_acc:', sum(acc_list)/len(acc_list))
        train_loss_plot.append(sum(loss_list)/len(loss_list))
        train_acc_plot.append(sum(acc_list)/len(acc_list))
        
    def validation_epoch_end(self, validation_step_outputs):
        loss_list = []
        acc_list = []
        for output in validation_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('        validation_loss:', sum(loss_list)/len(loss_list), 'validation_acc:', sum(acc_list)/len(acc_list))
        val_loss_plot.append(sum(loss_list)/len(loss_list))
        val_acc_plot.append(sum(acc_list)/len(acc_list))
        
    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr,betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)
    
    def train_dataloader(self):
        dataset = train_set
        self.cls_num_weight_list('train')
        if self.oversample:
            return DataLoader(dataset, batch_size=40, num_workers=8, sampler=ImbalancedDatasetSampler(dataset))
        else:
            return DataLoader(dataset, batch_size=40, num_workers=8, shuffle=True)

    def val_dataloader(self):
        dataset = val_set
        self.cls_num_weight_list('val')
        return DataLoader(dataset, batch_size=40, num_workers=8, shuffle=False)


# model = BL2(lr=0.0002, verbose=False, oversample=False, C = C)
# trainer = Trainer(
#     gpus=1,
#     progress_bar_refresh_rate = 20,
#     callbacks=[EarlyStopping(
#           monitor='val_loss',
#           patience=30,
#         min_delta=0.00,
#         verbose=False
#       )],
#       num_sanity_val_steps=0
# )
# trainer.fit(model)
# model.eval()

model = torch.load('BL2.pkl')

Training:

In [None]:
evaluate(model, train_set)

Validation:

In [None]:
evaluate(model, val_set)

# *Effect of LDAMLoss on the model*
Before Application of LDAMLoss
![dataaug](graphs/dataaugx3.jpeg)
After Application of LDAMLoss
![da](graphs/clsreport.jpeg)

After Applying LDAMLoss, the training accuracy has dropped 2% but the validation accuracy increased 2%. This is because applying LDAMLoss reduced the prediction on class 1 which is the largest dataset and the no. of prediction on class2 has increased. However, since class 2 has less sample, the accuracy of correctly predicted class2 is lower than that of class 1, resulting in a lower overall accuracy. During validation, the precision on class 2 has greatly increased from 0.51 to 0.68. This imply the model predict class 2 a more frequently. the F1 score of class 2 increased from 62 to 69 contributed to the overall increase of validation accuracy.


# Final Model

Observed Problem of baseline 1& 2:

1. Imbalanced dataset
2. Convergence with low accuracy in early epoch

Applied Techniques : 

1. Class Balanced Loss
- $CB_{softmax}((x,y);f) = -\frac{1-\beta}{1-\beta^{n_y}}log(\frac{e^{z_y}}{\sum_{j}e^{z_j}})$

- weighting is introduced to the loss function
- larger weight for smaller class and smaller weight for larger class
- purpose: increase the error for smaller class to allow optimiser have a higher momentum towards 
2. Predicted output Scale up

- Scaling up the output value by multiplying **30** ,  then the difference between predicted output and target will all linearly scaled up, therefore the optimiser will have more space to adapt the loss and update the parameters. Therefore, the loss at first several epochs will drop rapidly as the optimiser will have a more aggressive approach and less aggressive when the loss is reduced to a relatively smaller values.


In [None]:
class BL2(pl.LightningModule):
    def __init__(self, 
                 C = 0,
                 lr = 0.05,
                 oversample=False,
                 verbose = False
                ):
        super(BL2, self).__init__()
        self.oversample = oversample
        self.lr = lr
        self.verbose = verbose
        self.C = C
        self.cls_num_list = None #torch.empty(1, 3)
        self.cls_weight_list = None #torch.empty(1, 3)
        self.loss_func = nn.CrossEntropyLoss(weight = self.cls_weight_list)
        
        self.conv1 = nn.Conv2d(4, 3, 5)
        self.resnet = models.resnet50(pretrained = True)
        
        # freeze all layers in resnet
        for parameter in self.resnet.parameters():
            parameter.requires_grad = False
            
        # redefine the last fc layer in resnet
        #self.resnet.fc = nn.Linear(2048, 1024)
        self.resnet.fc.requires_grad=True
        
        self.fc1 = nn.Linear(1000, 512)
        self.fc2 = nn.Linear(512, 3)
        self.softmax = nn.Softmax(dim=1)
        

    def forward(self, x):
        x = self.conv1(x)
        x = self.resnet(x)
        x = F.leaky_relu(self.fc1(x))
        x = self.softmax(self.fc2(x))
        return x
    
    def training_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.LDAM(output,target)
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return {'loss': loss, 'acc': acc}

    def validation_step(self, batch, batch_idx):
        images, target = batch
        output = self.forward(images)
        _, preds = torch.max(output, 1)
        acc = (target == preds).float().mean()
        loss = self.LDAM(output,target)
        self.log('val_loss', loss)
        self.log('val_acc', acc)
        return {'loss': loss, 'acc': acc}

    def cls_num_weight_list(self,state):
        if state == 'train':
            num_list = train_smallset.get_cls_num_list(train_folder) #Size : [1,class size]
        elif state == 'test' or state == 'val':
            num_list = val_set.get_cls_num_list(val_folder) #Size : [1,class size] 

        #cls_weight_list
        beta = 0
        effective_num = 1.0 - np.power(beta, num_list)
        weight_list = (1.0 - beta) / np.array(effective_num)
        weight_list = weight_list / np.sum(weight_list) * len(num_list)
        weight_list = torch.FloatTensor(weight_list)       
        #---------
        self.cls_num_list = num_list
        self.cls_weight_list = weight_list

    def LDAM(self, before_m, target): # output Size : [batch size,class size] , target Size: [1,batch size]
        #Initialization
        C = self.C
        cls_num_list = self.cls_num_list
        
        #LDAM
        m_list = 1.0 / np.sqrt(np.sqrt(cls_num_list)) #1/n_j^(1/4)
        #print(m_list)
        m_list = m_list * (C / np.max(m_list)) #Size : [1,3] source: m_list = m_list * (C / np.max(m_list))
        m_list = torch.cuda.FloatTensor(m_list) #type change to tensor ,Size : [1,class size]
        #print(m_list)
        index = torch.zeros_like(before_m, dtype=torch.uint8) #create array ,Size : [batch size,class size]
        #print('index before',index)
        index.scatter_(1, target.data.view(-1, 1), 1)# one-hot of target, Size : [batch size,class size]
        #print('target',target)    
        #print('index after',index)    
        index_float = index.type(torch.cuda.FloatTensor)# index type: int -> float Size : [batch size,class size]
        batch_m = torch.matmul(m_list[None, :], index_float.transpose(0,1)) # m_list[1,class size] * index_float[class size,batch size] ,batch_m Size : [1,batch size]
        #print('batch_m',batch_m)
        batch_m = batch_m.view((-1, 1)) # Transpose Size : [batch size,1]
        #print('batch_m',batch_m)
        #print('Before',before_m) 
        x_m = before_m - batch_m  
        
        #print('x_m',x_m)  
        after_m = torch.where(index, x_m, before_m) # replace z_y by z_y-delta_y only at the target class
        #print('where',after_m)
        #print('After',after_m - before_m) 
        #print(self.cls_weight_list)
        return self.loss_func(30*after_m,target)

    def training_epoch_end(self, training_step_outputs):
        loss_list = []
        acc_list = []
        for output in training_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('train_loss:', sum(loss_list)/len(loss_list), 'train_acc:', sum(acc_list)/len(acc_list))
        train_loss_plot.append(sum(loss_list)/len(loss_list))
        train_acc_plot.append(sum(acc_list)/len(acc_list))
        
    def validation_epoch_end(self, validation_step_outputs):
        loss_list = []
        acc_list = []
        for output in validation_step_outputs:
            loss_list.append(output["loss"].item())
            acc_list.append(output["acc"].item())
        if self.verbose:
            print('        validation_loss:', sum(loss_list)/len(loss_list), 'validation_acc:', sum(acc_list)/len(acc_list))
        val_loss_plot.append(sum(loss_list)/len(loss_list))
        val_acc_plot.append(sum(acc_list)/len(acc_list))
        
    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr,betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)
    
    def train_dataloader(self):
        dataset = train_set
        self.cls_num_weight_list('train')
        if self.oversample:
            return DataLoader(dataset, batch_size=40, num_workers=8, sampler=ImbalancedDatasetSampler(dataset))
        else:
            return DataLoader(dataset, batch_size=40, num_workers=8, shuffle=True)

    def val_dataloader(self):
        dataset = val_set
        self.cls_num_weight_list('val')
        return DataLoader(dataset, batch_size=40, num_workers=8, shuffle=False)


  # model = BL2(lr=0.0002, verbose=False, oversample=False, C = C)
  # trainer = Trainer(
  #     gpus=1,
  #     progress_bar_refresh_rate = 20,
  #     callbacks=[EarlyStopping(
  #           monitor='val_loss',
  #           patience=30,
  #         min_delta=0.00,
  #         verbose=False
  #       )],
  #       num_sanity_val_steps=0
  # )
  # trainer.fit(model)
  # model.eval()

model = torch.load('model_final.pkl')

Training:

In [None]:
evaluate(model, train_set)

Validation:

In [None]:
evaluate(model, val_set)