# 문제 정의 및 접근방안

0. 이 문제는 (3, 227, 227) Size의 Image가 어떤 Class에 해당하는지 분류하는 Task이다.

1. Train Dataset의 분포를 보았을 때, 6번 Class(사람)의 비율이 매우 많은 것으로 확인했다.
다른 Class의 분포는 이에 비해 적으나, Test Set에서는 이와 같은 분포가 유지될 것이라 생각할 수 없다.
고로, Class Imbalanced Problem을 해결하기 위한 방법이 강구되어야 하겠다.

2. 이미지는 Class에 해당하는 그림으로 판단되며, 사진과는 다소 다른 색감을 지니는 것을 확인했다.
또한, 비교적 이미지 전체에 각 특징이 분포하고 있어, 이미지의 특정 위치에 가중치를 주는 방법은 사용할 수 없으리라 생각되었다.
Data 증강시에는 위와 같은 정보를 활용해야 하겠다.

3. Data의 수가 적은 만큼, 최대한 Parameter의 수가 적으면서도 표현력이 높은 Model이 필요할 것이라 생각된다.
Efficient Net을 활용할 수 있으리라 생각되며, Data의 특징을 잘 분류할 수 있는 수준의 Version을 사용해야 하겠다.

In [1]:
# Google Drive에 연결하는 Code
from google.colab import drive 
drive.mount('/content/drive')

In [2]:
# 제공된 File 복사
! cp /content/drive/MyDrive/dev_match/test.zip .
! cp /content/drive/MyDrive/dev_match/train.zip .
! cp /content/drive/MyDrive/dev_match/test_answer_sample_.csv .
! unzip test.zip
! unzip train.zip
! mkdir output

In [3]:
# 필요한 Library 설치
! pip install opencv-python
! pip install ipywidgets
! pip install adamp
! pip install wandb
! pip install transformers
! pip install timm
! pip install albumentations

In [4]:
# import labraries
import os
import glob
import random
import math
from pathlib import Path

from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import albumentations as A
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from adamp import AdamP
import timm
import wandb
import matplotlib.pyplot as plt
from transformers import get_cosine_with_hard_restarts_schedule_with_warmup

# Set ROOT_PATH
ROOT_PATH = os.getcwd()
print(f'ROOT_PATH : {ROOT_PATH}')

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device : {device}')
print(torch.cuda.get_device_properties(device))

# Set wandb
wandb.login()
%env WANDB_PROJECT = dev-matching
%env WANDB_LOG_MODEL = true
%env WANDB_SILENT = true

ROOT_PATH : /content
device : cuda:0
_CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16160MB, multi_processor_count=80)


[34m[1mwandb[0m: Currently logged in as: [33mhkl[0m (use `wandb login --relogin` to force relogin)


env: WANDB_PROJECT=dev-matching
env: WANDB_LOG_MODEL=true
env: WANDB_SILENT=true


In [5]:
# 실험 관리를 위한 설정 및 Code
CFG = wandb.config
CFG.name = 'effb3_dr05_f1ce_1cycle_resize'
CFG.tag = ['effb3','dr05','f1+ce', 'scheduler1', 'resize']
CFG.NUM_FOLD = 5
CFG.FOLD = list(range(CFG.NUM_FOLD))

CFG.model_name = 'tf_efficientnet_b3_ns'
CFG.lr = 1e-4
CFG.batch_size = 32
CFG.epochs = 20
CFG.classifier_dropout = 0.5
CFG.weight_decay = 0.00
CFG.seed = 42

In [6]:
# 재현을 위해 Seed 고정
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG.seed)

In [7]:
# File의 Path를 확인하고, Labeling 하는 함수
def labeling(img_paths) :
    classes = ['dog', 'elephant', 'giraffe', 'guitar', 'horse', 'house', 'person']
    img_labels = []
    for img_path in img_paths :
        for idx, c in enumerate(classes) :
            if c in img_path :
                img_labels.append(idx)
                break
                
    return img_labels

In [8]:
# Image File의 Path와 Label을 변수에 저장하는 Code
path = Path(os.path.join(ROOT_PATH, 'train'))
img_paths = sorted([str(x) for x in list(path.glob('*/*'))])
img_labels = labeling(img_paths)

In [9]:
# Define Dataset
class MyDataset(Dataset) :
    def __init__(self, img_paths, img_labels=None, trsf=None, augs=None) :
        self.img_paths = img_paths
        self.img_labels = img_labels
        self.trsf = trsf
        self.augs = augs
    
    def __len__(self) :
        return len(self.img_paths)

    def __getitem__(self, idx) :
        img = cv2.imread(self.img_paths[idx])

        if self.augs :
            img = self.augs(image=img)['image']

        img = self.trsf(img)
        
        if self.img_labels :
            label = self.img_labels[idx]
            return {'image' : img, 'label' : label}
        else :
            return {'image' : img}

In [10]:
# Deffine Model
class MyModel(nn.Module) :
    def __init__(self, model_name, n_classes=7, pretrained=True, dropRate=0.7):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=n_classes, drop_rate=dropRate)
    
    def forward(self, x) :
        x = self.model(x)
        x = F.softmax(x, dim=-1)
        return x

In [11]:
# Numpy를 Tensor로 변경하는 Code
class ToTensor(object) :
    def __call__(self, img) :
        img = np.array(img)/255
        img = img.transpose((2,0,1))
        img = torch.FloatTensor(img)
        return img

In [23]:
# transform과 augmentation 정의
train_transform = T.Compose([
    T.ToPILImage(),
    T.Resize((331,331)),
    T.RandomHorizontalFlip(0.5),
    T.RandomRotation(10),
    ToTensor(),
    T.RandomErasing(),
    T.Normalize(mean=(0.556, 0.507, 0.457), std=(0.229, 0.218, 0.221))
])

train_augs = A.Compose([
    A.HueSaturationValue(),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit = (-0.1, 0.1), p = 0.5)
], p = 1.)

valid_transform = T.Compose([
    T.ToPILImage(),
    T.Resize((331,331)),
    ToTensor(),
    T.Normalize(mean=(0.556, 0.507, 0.457), std=(0.229, 0.218, 0.221))
])

In [13]:
# Class의 분포를 고려해 Validation Set 구성
skf = StratifiedKFold(n_splits=CFG.NUM_FOLD, shuffle=True, random_state=CFG.seed)
folds = []
for train_idx, valid_idx in skf.split(img_paths, img_labels) :
    folds.append({'train_idx':train_idx, 'valid_idx':valid_idx})

In [14]:
# Class의 분포가 불균형하므로, 고려해볼 수 있는 Loss Function 정의
class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input, dim=-1)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [15]:
# Class의 분포가 불균형하므로, 고려해볼 수 있는 Loss Function 정의
class F1Loss(nn.Module):
    def __init__(self, classes=7, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()

In [16]:
# Model의 학습과 Class의 분포를 고려할 수 있는 Loss Function 정의
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1, weight = None):
        """if smoothing == 0, it's one-hot method
           if 0 < smoothing < 1, it's smooth method
        """
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.weight = weight
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        assert 0 <= self.smoothing < 1
        pred = pred.log_softmax(dim=self.dim)

        if self.weight is not None:
            pred = pred * self.weight.unsqueeze(0)   

        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [17]:
# Loss와 Metric을 핸들링 하기 위한 객체 선언
class AverageMeter(object):
    def __init__(self):
        self.reset()
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [18]:
# Training Step을 함수로 정의
def training(batch, model, loss_fn1, loss_fn2, optimizer, scheduler) :
    imgs = batch['image'].float().to(device)
    labels = batch['label'].to(device)
    
    model.train()
    optimizer.zero_grad()
    pred = model(imgs)
    loss = loss_fn1(pred, labels)
    loss += loss_fn2(pred, labels)
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    pred = torch.argmax(pred, dim=1)
    acc = accuracy_score(labels.cpu(), pred.cpu())
    
    return loss, acc

In [19]:
# Validating Step을 함수로 정의
def validating(batch, model, loss_fn1, loss_fn2) :
    imgs = batch['image'].float().to(device)
    labels = batch['label'].to(device)
    
    model.eval()
    with torch.no_grad() :
        pred = model(imgs)
        loss = loss_fn1(pred, labels)
        loss += loss_fn2(pred, labels)
    
    pred = torch.argmax(pred, dim=1)
    acc = accuracy_score(labels.cpu(), pred.cpu())
    
    return loss, acc

In [20]:
# Data의 수가 많은 Class에 낮은 가중치를 주도록 Weight 정의
label_weights = pd.Series(img_labels).value_counts().sort_index()
label_weights = torch.FloatTensor([1-(x/(sum(label_weights))) for x in label_weights])
label_weights = label_weights.to(device)

In [21]:
# 학습의 모니터링을 위한 Code
wandb.init(project='dev-matching', entity='hkl', config=CFG, name=CFG.name)

# 두개의 Loss를 이용하여 학습 진행
loss_fn1 = F1Loss()
loss_fn2 = torch.nn.CrossEntropyLoss(weight=label_weights)

# OOF 학습 Code
for fold in CFG.FOLD :
    print(f'Now Training Fold is {fold}')
    
    # Fold에 맞는 Dataset과 DataLoader 정의
    train_idx = folds[fold]['train_idx']
    valid_idx = folds[fold]['valid_idx']
    train_dataset = MyDataset(img_paths, img_labels, train_transform, train_augs)
    valid_dataset = MyDataset(img_paths, img_labels, train_transform)
    sub_train_dataset = Subset(train_dataset, train_idx)
    sub_valid_dataset = Subset(valid_dataset, valid_idx)
    train_loader = DataLoader(dataset=sub_train_dataset, batch_size=CFG.batch_size, shuffle=True, num_workers=3)
    valid_loader = DataLoader(dataset=sub_valid_dataset, batch_size=CFG.batch_size, shuffle=True, num_workers=3)
    
    # Fold별로 Model과 학습에 필요한 객체 생성
    model = MyModel(CFG.model_name, dropRate=CFG.classifier_dropout)
    model.to(device)
    optimizer = AdamP(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    tot_step = math.ceil(len(sub_train_dataset) / CFG.batch_size) * CFG.epochs
    scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer=optimizer, num_warmup_steps=50, num_training_steps=tot_step, num_cycles=2)
    best_acc = 0

    # Epoch 내 학습 및 추론 Code
    for epoch in range(CFG.epochs) :
        # Traning Phase
        train_loss = AverageMeter()
        train_acc = AverageMeter()
        with tqdm(train_loader,
             total=train_loader.__len__(),
             unit='batch') as train_bar :
            for batch in train_bar :
                train_bar.set_description(f'Epoch {epoch+1} / {CFG.epochs}')
                loss, acc = training(batch, model, loss_fn1, loss_fn2, optimizer, scheduler)
                train_loss.update(loss.item(), len(batch))
                train_acc.update(acc, len(batch))
                train_bar.set_postfix(train_acc=train_acc.avg, train_loss=train_loss.avg, Train=epoch+1)
                wandb.log({
                        'train_acc' : train_acc.avg,
                        'train_loss' : train_loss.avg,
                        'train_lr' : scheduler.get_last_lr()[0] if scheduler is not None else training_args.learning_rate
                    })
    
        # Validating Phase
        valid_loss = AverageMeter()
        valid_acc = AverageMeter()
        with tqdm(valid_loader,
             total=valid_loader.__len__(),
             unit='batch') as valid_bar :
            for batch in valid_bar :
                valid_bar.set_description(f'Epoch {epoch+1} / {CFG.epochs}')
                loss, acc = validating(batch, model, loss_fn1, loss_fn2)
                valid_loss.update(loss.item(), len(batch))
                valid_acc.update(acc, len(batch))
                valid_bar.set_postfix(valid_acc=valid_acc.avg, valid_loss=valid_loss.avg, Valid=epoch+1)
                wandb.log({
                    'valid_acc' : valid_acc.avg,
                    'valid_loss' : valid_loss.avg
                })
                
            # Epoch 별로, Model의 성능을 확인하고 저장하는 Code
            if valid_acc.avg >= best_acc :
                best_acc = valid_acc.avg
                torch.save(model.state_dict(), f'output/{CFG.name}_fold{fold}.pth')

Now Training Fold is 0


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


Now Training Fold is 1


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


Now Training Fold is 2


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


Now Training Fold is 3


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


Now Training Fold is 4


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




In [24]:
# Ensemble Inference Code
submission = pd.read_csv('test_answer_sample_.csv')
test_path = Path(os.path.join(ROOT_PATH, 'test'))
test_img_paths = sorted([str(x) for x in list(test_path.glob('0/*'))])
test_dataset = MyDataset(test_img_paths, trsf=valid_transform)
test_loader = DataLoader(dataset=test_dataset, batch_size=CFG.batch_size, shuffle=False)

# ./output 안에 있는 Model의 경로를 모두 활용하는 Code
prediction_lst = []
for best_model in glob.glob(f'output/*.pth') :
    model = MyModel(CFG.model_name, dropRate=CFG.classifier_dropout)
    model.load_state_dict(torch.load(best_model))
    model.to(device)
    model.eval()
    prediction_array=[]
    
    with tqdm(test_loader,
             total=test_loader.__len__(),
             unit='batch') as test_bar :
        for batch in test_bar :
            imgs = batch['image'].float().to(device)
            probs = model(imgs)
            probs = probs.cpu().detach().numpy()
            prediction_array.extend(probs)
    
    # 각 Model의 Logit을 하나의 List에 저장
    prediction_lst.append(np.array(prediction_array)[...,np.newaxis])

# Soft Voting으로 Ensemble 수행    
submission['answer value'] = np.argmax(np.mean(np.concatenate(prediction_lst, axis=2), axis=2), axis=1)
submission.to_csv(f'output/{CFG.name}_ensemble.csv', index=False)

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


