## Import

In [3]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2
import timm
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models
from torchvision import datasets, models, transforms
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm
from focal_loss.focal_loss import FocalLoss
import warnings
warnings.filterwarnings(action='ignore') 

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [5]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':30,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':16,
    'SEED':41
}

## Fixed RandomSeed

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [7]:
all_img_list = glob.glob('./train/*/*')

In [8]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('\\')[1])

In [9]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.3, stratify=df['label'], random_state=CFG['SEED'])

## Label-Encoding

In [8]:
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'])
val['label'] = le.transform(val['label'])

## CustomDataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [10]:
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            # A.HorizontalFlip(),
                            # A.RandomContrast(limit=0.5),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [11]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)

train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

  0%|          | 0/2419 [00:00<?, ?it/s]

  0%|          | 0/1038 [00:00<?, ?it/s]

## Model Define

In [12]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = models.efficientnet_v2_l(pretrained=True)
        # self.backbone = timm.create_model('densenet201', pretrained=True, num_classes = 1000)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

## Valid & Train & Mixup_Data

In [14]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [13]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor).to(device)      # ADDED .type(torch.LongTensor)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            # m = torch.nn.Softmax(dim=-1)
            # loss = criterion(m(pred), labels) # For Focal Loss
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='weighted')
    
    return _val_loss, _val_score

In [15]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    weights = torch.FloatTensor([1.0, 2.4, 4.6, 6.7, 8.7, 9.7, 9.9, 10.8, 14.2, 24.6, 26.0, 27.5,
                                 52.0, 63.9, 82.6, 100.4, 117.0, 281, 468.3]).to(device)
    criterion = nn.CrossEntropyLoss(weight=weights).to(device)
    
    # criterion = FocalLoss(gamma=2, weights=weights).to(device) # Gamma = [0.5, 1, 2, 5] 클 수록 적은거에 집중
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        correct = 0
        total = 0
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.LongTensor).to(device) # ADDED .type(torch.LongTensor)
            
            inputs, targets_a, targets_b, lam = mixup_data(imgs, labels)
            inputs, targets_a, targets_b = map(Variable, (inputs,
                                                        targets_a, targets_b))
            outputs = model(imgs)
            
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            
            # loss = criterion(outputs, labels)
            
            # m = torch.nn.Softmax(dim=-1)
            # loss = criterion(m(outputs), labels) # For Focal Loss
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            # correct += predicted.eq(labels.data).cpu().sum().float()
            
            correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
                        + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Acc : [{100.*correct/total:.2f}%] Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val Weighted F1 Score : [{_val_score:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_score < _val_score:
            best_score = _val_score
            best_model = model
        print(f'Currently the Best Val Weighted F1 Score : [{best_score:.5f}]')
    
    return best_model

## Run!!

In [16]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [1], Train Acc : [39.98%] Train Loss : [1.70157] Val Loss : [1.45337] Val Weighted F1 Score : [0.42155]
Currently the Best Val Weighted F1 Score : [0.42155]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [2], Train Acc : [48.57%] Train Loss : [1.23931] Val Loss : [1.38070] Val Weighted F1 Score : [0.43540]
Currently the Best Val Weighted F1 Score : [0.43540]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [3], Train Acc : [53.82%] Train Loss : [0.98568] Val Loss : [1.53830] Val Weighted F1 Score : [0.41806]
Currently the Best Val Weighted F1 Score : [0.43540]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [4], Train Acc : [55.73%] Train Loss : [0.93987] Val Loss : [1.10196] Val Weighted F1 Score : [0.49883]
Currently the Best Val Weighted F1 Score : [0.49883]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [5], Train Acc : [64.65%] Train Loss : [0.63535] Val Loss : [1.00183] Val Weighted F1 Score : [0.64306]
Currently the Best Val Weighted F1 Score : [0.64306]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [6], Train Acc : [70.15%] Train Loss : [0.50475] Val Loss : [0.99593] Val Weighted F1 Score : [0.69545]
Currently the Best Val Weighted F1 Score : [0.69545]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [7], Train Acc : [71.02%] Train Loss : [0.47742] Val Loss : [1.03932] Val Weighted F1 Score : [0.61865]
Currently the Best Val Weighted F1 Score : [0.69545]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [8], Train Acc : [69.99%] Train Loss : [0.45831] Val Loss : [1.21425] Val Weighted F1 Score : [0.65578]
Currently the Best Val Weighted F1 Score : [0.69545]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [9], Train Acc : [72.22%] Train Loss : [0.42778] Val Loss : [1.20593] Val Weighted F1 Score : [0.63612]
Epoch 00009: reducing learning rate of group 0 to 1.5000e-04.
Currently the Best Val Weighted F1 Score : [0.69545]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [10], Train Acc : [80.82%] Train Loss : [0.25580] Val Loss : [1.03683] Val Weighted F1 Score : [0.72881]
Currently the Best Val Weighted F1 Score : [0.72881]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [11], Train Acc : [85.78%] Train Loss : [0.15432] Val Loss : [1.37746] Val Weighted F1 Score : [0.75100]
Currently the Best Val Weighted F1 Score : [0.75100]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [12], Train Acc : [89.05%] Train Loss : [0.11810] Val Loss : [1.32896] Val Weighted F1 Score : [0.77298]
Currently the Best Val Weighted F1 Score : [0.77298]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [13], Train Acc : [90.91%] Train Loss : [0.09293] Val Loss : [1.12936] Val Weighted F1 Score : [0.74136]
Currently the Best Val Weighted F1 Score : [0.77298]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [14], Train Acc : [93.55%] Train Loss : [0.06327] Val Loss : [1.51759] Val Weighted F1 Score : [0.78480]
Currently the Best Val Weighted F1 Score : [0.78480]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [15], Train Acc : [93.26%] Train Loss : [0.08070] Val Loss : [1.42047] Val Weighted F1 Score : [0.75966]
Currently the Best Val Weighted F1 Score : [0.78480]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [16], Train Acc : [94.79%] Train Loss : [0.05607] Val Loss : [1.52613] Val Weighted F1 Score : [0.75069]
Currently the Best Val Weighted F1 Score : [0.78480]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [17], Train Acc : [95.33%] Train Loss : [0.04178] Val Loss : [1.52236] Val Weighted F1 Score : [0.78671]
Currently the Best Val Weighted F1 Score : [0.78671]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [18], Train Acc : [92.60%] Train Loss : [0.06118] Val Loss : [1.50928] Val Weighted F1 Score : [0.77176]
Currently the Best Val Weighted F1 Score : [0.78671]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [19], Train Acc : [96.90%] Train Loss : [0.03044] Val Loss : [1.71978] Val Weighted F1 Score : [0.77938]
Currently the Best Val Weighted F1 Score : [0.78671]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [20], Train Acc : [97.52%] Train Loss : [0.02812] Val Loss : [1.50358] Val Weighted F1 Score : [0.79068]
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [21], Train Acc : [96.28%] Train Loss : [0.03268] Val Loss : [1.73379] Val Weighted F1 Score : [0.73258]
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [22], Train Acc : [96.69%] Train Loss : [0.03561] Val Loss : [1.74512] Val Weighted F1 Score : [0.77244]
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [23], Train Acc : [97.19%] Train Loss : [0.03117] Val Loss : [1.79003] Val Weighted F1 Score : [0.76966]
Epoch 00023: reducing learning rate of group 0 to 7.5000e-05.
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [24], Train Acc : [96.65%] Train Loss : [0.05216] Val Loss : [1.64027] Val Weighted F1 Score : [0.77834]
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [25], Train Acc : [97.93%] Train Loss : [0.02410] Val Loss : [1.71295] Val Weighted F1 Score : [0.77290]
Currently the Best Val Weighted F1 Score : [0.79068]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [26], Train Acc : [98.43%] Train Loss : [0.02156] Val Loss : [1.63059] Val Weighted F1 Score : [0.79699]
Currently the Best Val Weighted F1 Score : [0.79699]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [27], Train Acc : [97.93%] Train Loss : [0.01896] Val Loss : [1.54525] Val Weighted F1 Score : [0.78825]
Currently the Best Val Weighted F1 Score : [0.79699]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [28], Train Acc : [98.39%] Train Loss : [0.01370] Val Loss : [1.73125] Val Weighted F1 Score : [0.79501]
Currently the Best Val Weighted F1 Score : [0.79699]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [29], Train Acc : [99.01%] Train Loss : [0.00686] Val Loss : [1.84921] Val Weighted F1 Score : [0.80019]
Currently the Best Val Weighted F1 Score : [0.80019]


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [30], Train Acc : [98.80%] Train Loss : [0.00883] Val Loss : [1.82021] Val Weighted F1 Score : [0.78846]
Currently the Best Val Weighted F1 Score : [0.80019]


## Inference

In [17]:
test = pd.read_csv('./test.csv')

In [18]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

  0%|          | 0/792 [00:00<?, ?it/s]

In [19]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [20]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/50 [00:00<?, ?it/s]

## Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
submit['label'] = preds

In [None]:
submit.loc[submit['label'] == '0', 'label'] = '가구수정'
submit.loc[submit['label'] == '1', 'label'] = '걸레받이수정'
submit.loc[submit['label'] == '2', 'label'] = '곰팡이'
submit.loc[submit['label'] == '3', 'label'] = '꼬임'
submit.loc[submit['label'] == '4', 'label'] = '녹오염'
submit.loc[submit['label'] == '5', 'label'] = '들뜸'
submit.loc[submit['label'] == '6', 'label'] = '면불량'
submit.loc[submit['label'] == '7', 'label'] = '몰딩수정'
submit.loc[submit['label'] == '8', 'label'] = '반점'
submit.loc[submit['label'] == '9', 'label'] = '석고수정'
submit.loc[submit['label'] == '10', 'label'] = '오염'
submit.loc[submit['label'] == '11', 'label'] = '오타공'
submit.loc[submit['label'] == '12', 'label'] = '울음'
submit.loc[submit['label'] == '13', 'label'] = '이음부불량'
submit.loc[submit['label'] == '14', 'label'] = '창틀,문틀수정'
submit.loc[submit['label'] == '15', 'label'] = '터짐'
submit.loc[submit['label'] == '16', 'label'] = '틈새과다'
submit.loc[submit['label'] == '17', 'label'] = '피스'
submit.loc[submit['label'] == '18', 'label'] = '훼손'

In [None]:
submit.to_csv('./baseline_submit.csv', index=False)