In [1]:
import pandas as pd
import numpy as np
import os 
import shutil
from tqdm import tqdm

import copy
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset , DataLoader
import torchvision.models as models


import albumentations as A
import albumentations.pytorch

from efficientnet_pytorch import EfficientNet
    
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import time
import cv2
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline  
%config InlineBackend.figure_format='retina'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
# hyperparameter 
img_path = 'input/data/train/images'
batch_size = 128
num_workers = 2
learning_rate = 0.001
epochs = 100
img_size = 256
num_class = 18
early_stop = 10
num_accum = 4

In [3]:
df = pd.read_csv('preprocessing_data.csv')
df.drop(columns=['Unnamed: 0'], inplace = True)
df.head()

Unnamed: 0,path,id,mask,gender,age,label
0,input/data/train/images/000001_female_Asian_45...,1,0,1,1,4
1,input/data/train/images/000001_female_Asian_45...,1,2,1,1,16
2,input/data/train/images/000001_female_Asian_45...,1,0,1,1,4
3,input/data/train/images/000001_female_Asian_45...,1,0,1,1,4
4,input/data/train/images/000001_female_Asian_45...,1,1,1,1,10


In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, path , df ,transform = None):
        
        self.path = path
        self.df = df
        self.transform = transform
        
    def __getitem__(self,idx):
        image = cv2.imread(self.df['path'].iloc[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image) 
            image = augmented['image']
        label = self.df['label'].iloc[idx]
        return image, label

    def __len__(self):
        return len(self.df)

In [5]:
# albumentation 사용 
train_transforms = A.Compose([
    A.Resize(256,256),
    A.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
    A.pytorch.transforms.ToTensor()
])

val_transforms = A.Compose([
    A.Resize(256,256),
    A.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
    A.pytorch.transforms.ToTensor()
])

In [6]:
train_x , val_x , train_y , val_y = train_test_split(df , df['label'] , test_size=0.3 , shuffle = False)

train_dataset = CustomDataset(img_path , train_x , train_transforms)
val_dataset = CustomDataset(img_path , val_x , val_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size , shuffle = True , num_workers = num_workers)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size , shuffle = True , num_workers = num_workers)

In [7]:
class BaseModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.resnet50(pretrained = 'True')
        self.ReLU = nn.ReLU(True)
        self.fc1 = nn.Linear(1000,18)
        
    def forward(self,x):
        x = self.model(x)
        x = self.ReLU(x)
        x = self.fc1(x)
        return x

-------------------------------------------
### Mission2 
- F1 SCORE 구현해보기 

In [8]:
# # Micro F1 구현 


# def train(epochs , train_loader, val_loader , model , criterion , optimizer , lr_scheduler):
#     eps = 1e-8
#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_epoch = 0
#     best_val_loss = 999999999
    
#     for epoch in range(epochs):
#         ##################################### train ################################
#         model.train()
        
#         loss_train_sum = 0
#         acc_train_sum = 0
#         f1_train_sum = 0
#         scikit_f1_sum = 0

#         num_class = 18
        
#         for i , (img , target) in enumerate(tqdm(train_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             y_pred = model.forward(img)
#             _, preds = torch.max(y_pred, 1)
#             loss = criterion(y_pred, target)
            
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             loss_train_sum += loss.item()
#             acc_train_sum += (preds == target).sum().item()/ batch_size
            
#             ###################################F1 SCORE###################################
# #             FN = 0
# #             FP = 0
# #             TP = 0

# #             for n in range(num_class):# class 번호 
# #                 for gt , pred in zip(target.data.cpu(), preds.cpu()):
# #                     if gt == n and n != pred : 
# #                         FN += 1 
# #                     if gt != n and n == pred:
# #                         FP += 1
# #                     if gt == n and n == pred : 
# #                         TP += 1
# #             precision = TP / (TP + FP)
# #             recall = TP / (TP + FN)
# #             f1_train = 2 * (precision * recall) / (precision + recall + eps)
# #             f1_train_sum += f1_train
                        
                
#             scikit_f1_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='micro')
#             ###################################F1 SCORE###################################
        
#         loss_train_avg = loss_train_sum / len(train_loader)
#         acc_train_avg = acc_train_sum / len(train_loader)
#         # f1_train_avg = f1_train_sum / len(train_loader)
#         scikit_f1_train_avg = scikit_f1_sum / len(train_loader)

#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_train_avg:.3f}] acc : [{acc_train_avg : .3f}]  scikit_f1_avg : [{scikit_f1_train_avg : .3f}]")
#         # print(f" my f1 : [{f1_train_avg : .3f}]  scikit_f1_avg : [{scikit_f1_train_avg : .3f}]")
        
 
#         ##################################### eval ################################
#         model.eval()
        
#         loss_val_sum = 0
#         acc_val_sum = 0
#         f1_val_sum = 0
#         scikit_f1_val_sum = 0

        
#         for i , (img , target) in enumerate(tqdm(val_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             with torch.no_grad():
#                 y_pred = model.forward(img)
#                 _, preds = torch.max(y_pred, 1)
#                 loss = criterion(y_pred, target)
            
#             loss_val_sum += loss
#             acc_val_sum += (preds == target).sum().item()/ batch_size
            
#         ###################################F1 SCORE###################################
# #             FN = 0
# #             FP = 0
# #             TP = 0

# #             for n in range(num_class):# class 번호 
# #                 for gt , pred in zip(target.data.cpu(), preds.cpu()):
                    
# #                     if gt == n and n != pred : 
# #                         FN += 1 
# #                     if gt != n and n == pred:
# #                         FP += 1
# #                     if gt == n == pred : 
# #                         TP += 1
                        
# #             precision = TP / (TP + FP)
# #             recall = TP / (TP + FN)
# #             f1_val = 2 * (precision * recall) / (precision + recall + eps)
# #             f1_val_sum += f1_val
            
#             scikit_f1_val_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='micro')
#         ###################################F1 SCORE###################################
#         loss_val_avg = loss_val_sum / len(val_loader)
#         acc_val_avg = acc_val_sum / len(val_loader)
#        #  f1_val_avg = f1_val_sum / len(val_loader)
#         scikit_f1_val_avg = scikit_f1_val_sum / len(val_loader)
        
        
#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_val_avg:.3f}] acc : [{acc_val_avg : .3f}]  scikit f1 : [{scikit_f1_val_avg : .3f}] ")
#         # print(f" my f1 : [{f1_val_avg : .3f}] scikit f1 : [{scikit_f1_val_avg : .3f}]")
#         lr_scheduler.step()
        
#         # 0401 Mission 
#         if best_val_loss > loss_val_avg:
#             best_val_loss = loss_val_avg
#             best_epoch = epoch
#             best_model_wts = copy.deepcopy(model.state_dict())    
        
    
#     # load best model weights
#     model.load_state_dict(best_model_wts)
#     torch.save(model.state_dict(), f'baseline_{best_epoch}.pt')

In [9]:
# Macro 구현 
def train(epochs , train_loader, val_loader , model , criterion , optimizer , lr_scheduler):
    
    eps = 1e-8
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch = 0
    best_val_loss = 999999999
    
    for epoch in range(epochs):
        ##################################### train ################################
        model.train()
        
        loss_train_sum = 0
        acc_train_sum = 0
        f1_train_sum = 0
        scikit_f1_train_sum = 0
  
        
        for i , (img , target) in enumerate(tqdm(train_loader)):
            img = img.to(device)
            target = target.to(device)
            
            y_pred = model.forward(img)
            _, preds = torch.max(y_pred, 1)
            loss = criterion(y_pred, target)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train_sum += loss
            acc_train_sum += (preds == target).sum().item()/ batch_size
            
            ###################################F1 SCORE###################################
            # 클래스 별로  f1을 계산한 다음 클래스 개수만큼 나눠줘야한다. 
            f1_dic = {}
            for n in range(num_class):# class 번호 
                FN = 0
                FP = 0
                TP = 0
                
                for gt , pred in zip(target.data.cpu(), preds.cpu()):
                    if gt == n and n != pred : 
                        FN += 1 
                    if gt != n and n == pred:
                        FP += 1
                    if gt == n and n == pred : 
                        TP += 1
                
                precision = TP / (TP + FP + eps)
                recall = TP / (TP + FN + eps)         
                f1_dic[n] = 2 * (precision * recall) / (precision + recall + eps)
            
            #class별 f1 계산 끝! 
            print(f1_dic)
            f1_train = sum(list(f1_dic.values())) / num_class
            f1_train_sum += f1_train
            scikit_f1_train_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='macro')
            
            ###################################F1 SCORE###################################
        
        loss_train_avg = loss_train_sum / len(train_loader)
        acc_train_avg = acc_train_sum / len(train_loader)
        f1_train_avg = f1_train_sum / len(train_loader)
        scikit_f1_train_avg = scikit_f1_train_sum / len(train_loader)
        
        print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_train_avg:.3f}] acc : [{acc_train_avg : .3f}] f1 : [{f1_train_avg : .3f}] scikit_f1 : [{scikit_f1_train_avg : .3f}]")
        
 
        ##################################### eval ################################
        model.eval()
        
        loss_val_sum = 0
        acc_val_sum = 0
        val_val_sum = 0
        scikit_f1_val_sum = 0
        
        FN = 0
        FP = 0
        TP = 0
        
        for i , (img , target) in enumerate(tqdm(val_loader)):
            img = img.to(device)
            target = target.to(device)
            
            with torch.no_grad():
                y_pred = model.forward(img)
                _, preds = torch.max(y_pred, 1)
                loss = criterion(y_pred, target)
                
            
            loss_val_sum += loss
            acc_val_sum += (preds == target).sum().item()/ batch_size
            
         ###################################F1 SCORE###################################
            # 클래스 별로  f1을 계산한 다음 클래스 개수만큼 나눠줘야한다. 
            f1_dic = {}
            for n in range(num_class):# class 번호 
                FN = 0
                FP = 0
                TP = 0
                
                for gt , pred in zip(target.data.cpu(), preds.cpu()):
                    if gt == n and n != pred : 
                        FN += 1 
                    if gt != n and n == pred:
                        FP += 1
                    if gt == n and n == pred : 
                        TP += 1
                
                precision = TP / (TP + FP + eps)
                recall = TP / (TP + FN + eps)         
                f1_dic[n] = 2 * (precision * recall) / (precision + recall + eps)
            
            #class별 f1 계산 끝! 
            f1_val = sum(list(f1_dic.values())) / num_class
            f1_val_sum += f1_val
            
            scikit_f1_val_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='macro')
            
            ###################################F1 SCORE###################################
        
        
        loss_val_avg = loss_val_sum / len(val_loader)
        acc_val_avg = acc_val_sum / len(val_loader)
        f1_val_avg = f1_val_sum / len(val_loader)
        scikit_f1_val_avg = scikit_f1_val_sum / len(train_loader)
        
        print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_val_avg:.3f}] acc : [{acc_val_avg : .3f}]  f1 : [{f1_val_avg : .3f}]  scikit_f1 : [{scikit_f1_val_avg : .3f}]")
        
        lr_scheduler.step()
        
        # 0401 Mission 
        if best_val_loss > loss_val_avg:
            best_val_loss = loss_val_avg
            best_epoch = epoch
            best_model_wts = copy.deepcopy(model.state_dict())    
        
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), f'baseline_{best_epoch}.pt')

In [10]:
# # # weighted F1
# from collections import Counter

# def train(epochs , train_loader, val_loader , model , criterion , optimizer , lr_scheduler):
    
#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_epoch = 0
#     best_val_loss = 999999999
#     eps  = 1e-8
    
#     for epoch in range(epochs):
#         ##################################### train ################################
#         model.train()
        
#         loss_train_sum = 0
#         acc_train_sum = 0
#         f1_train_sum = 0
#         scikit_f1_train_sum = 0
  
        
#         for i , (img , target) in enumerate(tqdm(train_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             y_pred = model.forward(img)
#             _, preds = torch.max(y_pred, 1)
#             loss = criterion(y_pred, target)
            
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             loss_train_sum += loss
#             acc_train_sum += (preds == target).sum().item()/ batch_size
            
#             ###################################F1 SCORE###################################
#             # 클래스 별로  f1을 계산한 다음 클래스 개수만큼 나눠줘야한다. 
#             f1_dic = {}
#             TC_dic = Counter(target.data.cpu().numpy())
#            # print(TC_dic)
#             for n in range(num_class):# class 번호 
#                 FN = 0
#                 FP = 0
#                 TP = 0
                
#                 for gt , pred in zip(target.data.cpu(), preds.cpu()):
#                     if gt == n and n != pred : 
#                         FN += 1 
#                     if gt != n and n == pred:
#                         FP += 1
#                     if gt == n and n== pred : 
#                         TP += 1
                
#                 precision = TP / (TP + FP + eps)
#                 recall = TP / (TP + FN + eps)         
#                 f1_dic[n] = 2 * (precision * recall) / (precision + recall + eps)
            
#             #class별 f1 계산 끝! 
#             f1_train = sum([a * b for a , b in zip(list(f1_dic.values()) ,list(TC_dic.values()))]) / sum(list(TC_dic.values()))
#             f1_train_sum += f1_train
#             scikit_f1_train_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
#             ###################################F1 SCORE###################################
        
#         loss_train_avg = loss_train_sum / len(train_loader)
#         acc_train_avg = acc_train_sum / len(train_loader)
#         f1_train_avg = f1_train_sum / len(train_loader)
#         scikit_f1_train_avg = scikit_f1_train_sum / len(train_loader)
#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_train_avg:.3f}] acc : [{acc_train_avg : .3f}] f1 : [{f1_train_avg : .3f}]  scikit_f1 : [{scikit_f1_train_avg : .3f}]")
        
 
#         ##################################### eval ################################
#         model.eval()
        
#         loss_val_sum = 0
#         acc_val_sum = 0
#         f1_val_sum = 0
#         scikit_f1_train_sum = 0
#         FN = 0
#         FP = 0
#         TP = 0
        
#         for i , (img , target) in enumerate(tqdm(val_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             with torch.no_grad():
#                 y_pred = model.forward(img)
#                 _, preds = torch.max(y_pred, 1)
#                 loss = criterion(y_pred, target)
            
#             loss_val_sum += loss
#             acc_val_sum += (preds == target).sum().item()/ batch_size
            
#          ###################################F1 SCORE###################################
#             # 클래스 별로  f1을 계산한 다음 클래스 개수만큼 나눠줘야한다. 
#             f1_dic = {}
#             TC_dic = Counter(target.data.cpu().numpy())
#             print(TC_dic)
#             for n in range(num_class):# class 번호 
#                 FN = 0
#                 FP = 0
#                 TP = 0
                
#                 for gt , pred in zip(target.data.cpu(), preds.cpu()):
#                     if gt == n and n != pred : 
#                         FN += 1 
#                     if gt != n and n == pred:
#                         FP += 1
#                     if gt == n == pred : 
#                         TP += 1
                
#                 precision = TP / (TP + FP)
#                 recall = TP / (TP + FN)         
#                 f1_dic[n] = 2 * (precision * recall) / (precision + recall)
            
#             #class별 f1 계산 끝! 
#             f1_val = sum(list(f1_dic.values())) / sum(list(TC_dic.values()))
#             f1_val_sum += f1_val
            
            
#             scikit_f1_train_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
#             ###################################F1 SCORE###################################
        
        
#         loss_val_avg = loss_val_sum / len(val_loader)
#         acc_val_avg = acc_val_sum / len(val_loader)
#         f1_val_avg = f1_val_sum / len(val_loader)
#         scikit_f1_train_avg = scikit_f1_train_sum / len(val_loader)
#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_val_avg:.3f}] acc : [{acc_val_avg : .3f}]  f1 : [{f1_val_avg : .3f}]  scikit_f1 : [{scikit_f1_val_avg : .3f}]")
        
#         lr_scheduler.step()
        
#         # 0401 Mission 
#         if best_val_loss > loss_val_avg:
#             best_val_loss = loss_val_avg
#             best_epoch = epoch
#             best_model_wts = copy.deepcopy(model.state_dict())    
        
    
#     # load best model weights
#     model.load_state_dict(best_model_wts)
#     torch.save(model.state_dict(), f'baseline_{best_epoch}.pt')

------------------------------------------------------------
### Mission 1
- checkpoint를 짜보자

In [11]:
#         if best_val_loss > loss_val_loss:
#             best_val_loss = loss_val_loss
#             best_epoch = epoch
#             best_model_wts = copy.deepcopy(model.state_dict())    
        
    
#     load best model weights
#     model.load_state_dict(best_model_wts)
#     torch.save(model.state_dict(), f'baseline_{best_epoch}.pt')

------------------------------------------------------------

--------------------------------------------------------
### Mission 3
- Gradient Accumulation 을 구현해보자 ! 
--------------------------------------------------------

In [12]:
class F1_Loss(nn.Module):
    '''Calculate F1 score. Can work with gpu tensors
    
    The original implmentation is written by Michal Haltuf on Kaggle.
    
    Returns
    -------
    torch.Tensor
        `ndim` == 1. epsilon <= val <= 1
    
    Reference
    ---------
    - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
    - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
    - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
    - http://www.ryanzhang.info/python/writing-your-own-loss-function-module-for-pytorch/
    '''
    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true,):
    
        assert y_pred.ndim == 2
        assert y_true.ndim == 1

        y_true = F.one_hot(y_true, 18).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)
        
        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
        return 1 - f1.mean()

f1_loss = F1_Loss().cuda()

class F1_Loss(nn.Module):
    
    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true,):
    
        assert y_pred.ndim == 2
        assert y_true.ndim == 1

        y_true = F.one_hot(y_true, 18).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)
        
        f1 = f1_score(target.data, preds, average='weighted')
        f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
        return 1 - f1.mean()

f1_loss = F1_Loss().cuda()

In [13]:
# Gradient Accumulation 

def train(epochs , train_loader, val_loader , model , criterion , optimizer , lr_scheduler = None , num_early_stop = 5 , num_accum = 4):
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch = 0
    best_val_loss = 999999999
    eps  = 1e-8
    early_stop = 0
    

    for epoch in range(epochs):
        if early_stop >= num_early_stop : break
        
        ##################################### train ################################
        model.train()
        
        loss_train_sum = 0
        acc_train_sum = 0
        scikit_f1_train_sum = 0
  
        # epoch 시작하기 전 reset 
        optimizer.zero_grad()
    
        for i , (img , target) in enumerate(tqdm(train_loader)):
            img = img.to(device)
            target = target.to(device)
            
            y_pred = model.forward(img)
            _, preds = torch.max(y_pred, 1)
            # loss = criterion(y_pred, target) / num_accum            # 어차피 loss가 다 더해져서 나뉘나 , 나뉜 다음에 더해지나 똑같음 
            loss = f1_loss(y_pred, target) / num_accum
            loss.backward()
            
            if (i+1) % num_accum == 0:
                optimizer.step()
                optimizer.zero_grad()
            
            
            loss_train_sum += loss
            acc_train_sum += (preds == target).sum().item()/ batch_size
            scikit_f1_train_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
            ###################################F1 SCORE###################################
        
        loss_train_avg = loss_train_sum / len(train_loader)
        acc_train_avg = acc_train_sum / len(train_loader)
        scikit_f1_train_avg = scikit_f1_train_sum / len(train_loader)
        
        print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_train_avg:.3f}] acc : [{acc_train_avg : .3f}] scikit_f1 : [{scikit_f1_train_avg : .3f}]")
        print(f" learning rate : {lr_scheduler.get_last_lr()}")
        
 
        ##################################### eval ################################
        model.eval()
        
        loss_val_sum = 0
        acc_val_sum = 0
        f1_val_sum = 0
        scikit_f1_val_sum = 0

        
        for i , (img , target) in enumerate(tqdm(val_loader)):
            img = img.to(device)
            target = target.to(device)
            
            with torch.no_grad():
                y_pred = model.forward(img)
                _, preds = torch.max(y_pred, 1)
                # loss = criterion(y_pred, target)
                loss = f1_loss(y_pred, target) / num_accum
            
            loss_val_sum += loss
            acc_val_sum += (preds == target).sum().item()/ batch_size
            scikit_f1_val_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
            ###################################F1 SCORE###################################
        
        
        loss_val_avg = loss_val_sum / len(val_loader)
        acc_val_avg = acc_val_sum / len(val_loader)
        f1_val_avg = f1_val_sum / len(val_loader)
        scikit_f1_val_avg = scikit_f1_val_sum / len(val_loader)
        print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_val_avg:.3f}] acc : [{acc_val_avg : .3f}] scikit_f1 : [{scikit_f1_val_avg : .3f}]")
        
        if lr_scheduler:
            lr_scheduler.step()
        
        # 0401 Mission 
        if best_val_loss > loss_val_avg:
            best_val_loss = loss_val_avg
            best_epoch = epoch + 1
            best_model_wts = copy.deepcopy(model.state_dict())
            
            early_stop = 0
        
        else :
            early_stop += 1
        
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), f'baseline_accumulaton_{best_epoch}.pt')

In [14]:
# # # weighted F1
# from collections import Counter

# def train(epochs , train_loader, val_loader , model , criterion , optimizer , lr_scheduler = None , num_early_stop = 5):
    
#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_epoch = 0
#     best_val_loss = 999999999
#     eps  = 1e-8
#     early_stop = 0
    

#     for epoch in range(epochs):
#         if early_stop >= num_early_stop : break
        
#         ##################################### train ################################
#         model.train()
        
#         loss_train_sum = 0
#         acc_train_sum = 0
#         scikit_f1_train_sum = 0
  
        
#         for i , (img , target) in enumerate(tqdm(train_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             y_pred = model.forward(img)
#             _, preds = torch.max(y_pred, 1)
#             loss = criterion(y_pred, target)
            
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             loss_train_sum += loss
#             acc_train_sum += (preds == target).sum().item()/ batch_size
#             scikit_f1_train_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
#             ###################################F1 SCORE###################################
        
#         loss_train_avg = loss_train_sum / len(train_loader)
#         acc_train_avg = acc_train_sum / len(train_loader)
#         scikit_f1_train_avg = scikit_f1_train_sum / len(train_loader)
        
#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_train_avg:.3f}] acc : [{acc_train_avg : .3f}] scikit_f1 : [{scikit_f1_train_avg : .3f}]")
        
 
#         ##################################### eval ################################
#         model.eval()
        
#         loss_val_sum = 0
#         acc_val_sum = 0
#         f1_val_sum = 0
#         scikit_f1_val_sum = 0
        
#         for i , (img , target) in enumerate(tqdm(val_loader)):
#             img = img.to(device)
#             target = target.to(device)
            
#             with torch.no_grad():
#                 y_pred = model.forward(img)
#                 _, preds = torch.max(y_pred, 1)
#                 loss = criterion(y_pred, target)
            
#             loss_val_sum += loss
#             acc_val_sum += (preds == target).sum().item()/ batch_size
#             scikit_f1_val_sum += f1_score(target.data.detach().cpu(), preds.detach().cpu(), average='weighted')
#             ###################################F1 SCORE###################################
        
        
#         loss_val_avg = loss_val_sum / len(val_loader)
#         acc_val_avg = acc_val_sum / len(val_loader)
#         f1_val_avg = f1_val_sum / len(val_loader)
#         scikit_f1_val_avg = scikit_f1_val_sum / len(val_loader)
#         print(f" epoch:[{epoch+1}/{epochs}] cost:[{loss_val_avg:.3f}] acc : [{acc_val_avg : .3f}] scikit_f1 : [{scikit_f1_val_avg : .3f}]")
        
#         if lr_scheduler:
#             lr_scheduler.step()
        
#         # 0401 Mission 
#         if best_val_loss > loss_val_avg:
#             best_val_loss = loss_val_avg
#             best_epoch = epoch
#             best_model_wts = copy.deepcopy(model.state_dict())
            
#             early_stop = 0
        
#         else :
#             early_stop += 1
        
    
#     # load best model weights
#     model.load_state_dict(best_model_wts)
#     torch.save(model.state_dict(), f'baseline_{best_epoch}.pt')

In [15]:
Model = BaseModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(Model.parameters(), lr=1e-3, weight_decay=0.01)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 5, T_mult = 2, eta_min=1e-6)

In [16]:
train(epochs , train_loader , val_loader , Model, criterion , optimizer , lr_scheduler , num_early_stop = early_stop , num_accum = 4 )

100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[1/100] cost:[0.151] acc : [ 0.674] scikit_f1 : [ 0.646]
 learning rate : [0.001]


100%|██████████| 45/45 [00:16<00:00,  2.78it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[1/100] cost:[0.173] acc : [ 0.460] scikit_f1 : [ 0.442]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[2/100] cost:[0.113] acc : [ 0.790] scikit_f1 : [ 0.774]
 learning rate : [0.0009046039886902864]


100%|██████████| 45/45 [00:15<00:00,  2.81it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[2/100] cost:[0.172] acc : [ 0.581] scikit_f1 : [ 0.575]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[3/100] cost:[0.100] acc : [ 0.821] scikit_f1 : [ 0.806]
 learning rate : [0.0006548539886902864]


100%|██████████| 45/45 [00:17<00:00,  2.54it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[3/100] cost:[0.151] acc : [ 0.624] scikit_f1 : [ 0.642]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[4/100] cost:[0.086] acc : [ 0.855] scikit_f1 : [ 0.843]
 learning rate : [0.0003461460113097139]


100%|██████████| 45/45 [00:16<00:00,  2.78it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[4/100] cost:[0.134] acc : [ 0.771] scikit_f1 : [ 0.765]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[5/100] cost:[0.070] acc : [ 0.885] scikit_f1 : [ 0.876]
 learning rate : [9.639601130971382e-05]


100%|██████████| 45/45 [00:16<00:00,  2.75it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[5/100] cost:[0.134] acc : [ 0.762] scikit_f1 : [ 0.758]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[6/100] cost:[0.093] acc : [ 0.823] scikit_f1 : [ 0.812]
 learning rate : [0.001]


100%|██████████| 45/45 [00:16<00:00,  2.78it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[6/100] cost:[0.157] acc : [ 0.713] scikit_f1 : [ 0.697]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[7/100] cost:[0.099] acc : [ 0.802] scikit_f1 : [ 0.791]
 learning rate : [0.0009755527298894294]


100%|██████████| 45/45 [00:17<00:00,  2.60it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[7/100] cost:[0.153] acc : [ 0.712] scikit_f1 : [ 0.693]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[8/100] cost:[0.097] acc : [ 0.803] scikit_f1 : [ 0.794]
 learning rate : [0.0009046039886902864]


100%|██████████| 45/45 [00:16<00:00,  2.73it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[8/100] cost:[0.157] acc : [ 0.689] scikit_f1 : [ 0.664]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[9/100] cost:[0.089] acc : [ 0.841] scikit_f1 : [ 0.829]
 learning rate : [0.0007940987335200904]


100%|██████████| 45/45 [00:16<00:00,  2.76it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[9/100] cost:[0.146] acc : [ 0.714] scikit_f1 : [ 0.713]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[10/100] cost:[0.080] acc : [ 0.861] scikit_f1 : [ 0.850]
 learning rate : [0.0006548539886902864]


100%|██████████| 45/45 [00:18<00:00,  2.50it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[10/100] cost:[0.153] acc : [ 0.720] scikit_f1 : [ 0.703]


100%|██████████| 104/104 [01:39<00:00,  1.04it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[11/100] cost:[0.075] acc : [ 0.869] scikit_f1 : [ 0.861]
 learning rate : [0.0005005000000000001]


100%|██████████| 45/45 [00:16<00:00,  2.76it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[11/100] cost:[0.145] acc : [ 0.707] scikit_f1 : [ 0.706]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[12/100] cost:[0.070] acc : [ 0.881] scikit_f1 : [ 0.872]
 learning rate : [0.0003461460113097139]


100%|██████████| 45/45 [00:16<00:00,  2.76it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[12/100] cost:[0.153] acc : [ 0.660] scikit_f1 : [ 0.659]


100%|██████████| 104/104 [01:38<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[13/100] cost:[0.060] acc : [ 0.904] scikit_f1 : [ 0.898]
 learning rate : [0.00020690126647990973]


100%|██████████| 45/45 [00:16<00:00,  2.79it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[13/100] cost:[0.129] acc : [ 0.750] scikit_f1 : [ 0.776]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[14/100] cost:[0.048] acc : [ 0.943] scikit_f1 : [ 0.949]
 learning rate : [9.639601130971382e-05]


100%|██████████| 45/45 [00:16<00:00,  2.76it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[14/100] cost:[0.126] acc : [ 0.776] scikit_f1 : [ 0.787]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[15/100] cost:[0.044] acc : [ 0.958] scikit_f1 : [ 0.964]
 learning rate : [2.5447270110570814e-05]


100%|██████████| 45/45 [00:16<00:00,  2.68it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[15/100] cost:[0.125] acc : [ 0.784] scikit_f1 : [ 0.795]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[16/100] cost:[0.057] acc : [ 0.917] scikit_f1 : [ 0.924]
 learning rate : [0.001]


100%|██████████| 45/45 [00:16<00:00,  2.80it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[16/100] cost:[0.150] acc : [ 0.672] scikit_f1 : [ 0.678]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[17/100] cost:[0.072] acc : [ 0.865] scikit_f1 : [ 0.872]
 learning rate : [0.0009938503261272714]


100%|██████████| 45/45 [00:16<00:00,  2.71it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[17/100] cost:[0.165] acc : [ 0.593] scikit_f1 : [ 0.592]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[18/100] cost:[0.078] acc : [ 0.838] scikit_f1 : [ 0.845]
 learning rate : [0.0009755527298894294]


100%|██████████| 45/45 [00:16<00:00,  2.77it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[18/100] cost:[0.153] acc : [ 0.627] scikit_f1 : [ 0.632]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[19/100] cost:[0.079] acc : [ 0.833] scikit_f1 : [ 0.840]
 learning rate : [0.0009455577588320898]


100%|██████████| 45/45 [00:15<00:00,  2.85it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[19/100] cost:[0.152] acc : [ 0.670] scikit_f1 : [ 0.667]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[20/100] cost:[0.075] acc : [ 0.855] scikit_f1 : [ 0.861]
 learning rate : [0.0009046039886902864]


100%|██████████| 45/45 [00:16<00:00,  2.76it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[20/100] cost:[0.134] acc : [ 0.651] scikit_f1 : [ 0.694]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[21/100] cost:[0.071] acc : [ 0.873] scikit_f1 : [ 0.877]
 learning rate : [0.0008536998372026805]


100%|██████████| 45/45 [00:16<00:00,  2.81it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[21/100] cost:[0.130] acc : [ 0.790] scikit_f1 : [ 0.796]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[22/100] cost:[0.074] acc : [ 0.863] scikit_f1 : [ 0.869]
 learning rate : [0.0007940987335200904]


100%|██████████| 45/45 [00:16<00:00,  2.80it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[22/100] cost:[0.135] acc : [ 0.765] scikit_f1 : [ 0.773]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[23/100] cost:[0.065] acc : [ 0.887] scikit_f1 : [ 0.894]
 learning rate : [0.0007272682546199037]


100%|██████████| 45/45 [00:15<00:00,  2.82it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[23/100] cost:[0.138] acc : [ 0.721] scikit_f1 : [ 0.732]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[24/100] cost:[0.062] acc : [ 0.901] scikit_f1 : [ 0.907]
 learning rate : [0.0006548539886902864]


100%|██████████| 45/45 [00:16<00:00,  2.81it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

 epoch:[24/100] cost:[0.128] acc : [ 0.758] scikit_f1 : [ 0.768]


100%|██████████| 104/104 [01:39<00:00,  1.05it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

 epoch:[25/100] cost:[0.063] acc : [ 0.896] scikit_f1 : [ 0.903]
 learning rate : [0.0005786390152875954]


100%|██████████| 45/45 [00:16<00:00,  2.67it/s]


 epoch:[25/100] cost:[0.141] acc : [ 0.749] scikit_f1 : [ 0.753]


In [17]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [18]:
test_dir = 'input/data/eval'

In [20]:
img_size = 256
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
                                    transforms.Resize((img_size, img_size)),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],
                                                         [0.229, 0.224, 0.225])])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
Model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = Model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'f1loss_CAWS_AdamW_submission.csv'), index=False)
print('test inference is done!')

test inference is done!
