## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
# import cv2
from PIL import Image
from util import *

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from transformers import AutoFeatureExtractor, SwinForImageClassification, AutoImageProcessor, Trainer, TrainingArguments

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device.type

'cuda'

## Hyperparameter Setting

In [3]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':10,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'WEIGHT_DECAY':0.01,
    'SEED':42,
    'NUM_WORKERS':2,
    'PRETRAINED_MODEL': "microsoft/swin-tiny-patch4-window7-224",
    'MODEL_VER' : "0.0.1",
}

## Fixed RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [5]:
all_img_list = glob.glob('../data/train/*/*')

In [6]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x: str(x).split('/')[-2]).astype(int)

In [7]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.3, stratify=df['label'], random_state=CFG['SEED'])

## Label-Encoding

In [8]:
# le = preprocessing.LabelEncoder()
# train['label'] = le.fit_transform(train['label'])
# val['label'] = le.transform(val['label'])

## CustomDataset

<img src=https://d2.naver.com/content/images/2021/01/efbe9400-5214-11eb-9c67-30fab62770ec.png>

**Albumentation Tutorials**<br>
https://github.com/albumentations-team/albumentations_examples/blob/master/notebooks/migrating_from_torchvision_to_albumentations.ipynb

In [9]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None, processor=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        self.processor = processor

    def __getitem__(self, index):
        
        img_path = self.img_path_list[index]
        image = Image.open(img_path)
        image_tr = self.transforms(image=np.array(image))['image']
        pixel_values = self.processor(image_tr, return_tensors="pt").pixel_values.squeeze()
        
        if self.label_list is not None:
            label = self.label_list[index]
            return {
                'pixel_values': pixel_values, 
                'label': label,
                }
        else:
            return {
                'pixel_values': pixel_values,
                }
        
    def __len__(self):
        return len(self.img_path_list)

In [10]:
train_transform = A.Compose([
                            A.HorizontalFlip(p=0.5),
                            A.RandomBrightnessContrast(p=0.5),
                            A.RandomScale(scale_limit=0.1, p=0.5),
                            A.RandomCropFromBorders(p=0.5),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            # Augmentations
                            ToTensorV2()
                            ])

In [11]:
model_checkpoint = CFG['PRETRAINED_MODEL']
image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [12]:
train_dataset = CustomDataset(
    train['img_path'].values, train['label'].values, train_transform, image_processor)

val_dataset = CustomDataset(
    val['img_path'].values, val['label'].values, test_transform, image_processor)


## Model Define

In [13]:
labels = pd.read_csv("../data/map.csv")['Categories']


In [14]:
model = SwinForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
).to(device)

print_gpu_utilization()


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([19, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([19]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPU memory occupied: 1352 MB.


## Train

In [15]:
import evaluate

metric = evaluate.load("f1")

def compute_metrics(p):
  return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids, average='weighted')


In [16]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch]).type(torch.LongTensor),
    }

```
class SchedulerType(ExplicitEnum):
    LINEAR = "linear"
    COSINE = "cosine"
    COSINE_WITH_RESTARTS = "cosine_with_restarts"
    POLYNOMIAL = "polynomial"
    CONSTANT = "constant"
    CONSTANT_WITH_WARMUP = "constant_with_warmup"
    INVERSE_SQRT = "inverse_sqrt"
```


In [17]:
model_name = model_checkpoint.split("/")[-1]

args = TrainingArguments(
    f"../outputs/{model_name}-finetuned",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=CFG['LEARNING_RATE'],
    per_device_train_batch_size=CFG['BATCH_SIZE'],
    per_device_eval_batch_size=CFG['BATCH_SIZE'],
    num_train_epochs=CFG['EPOCHS'],
    weight_decay=CFG['WEIGHT_DECAY']
    gradient_accumulation_steps=4,
    dataloader_num_workers=CFG['NUM_WORKERS'],
    warmup_ratio=0.1,
    logging_steps=10,
    logging_dir='../logs',
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    # report_to='wandb',
)

In [18]:
trainer = Trainer(
    model=model,
    args=args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=image_processor,
)


In [19]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1
0,1.5011,1.363338,0.608775
1,0.7665,0.962486,0.705671
2,0.8237,0.810044,0.717663
4,0.5523,0.773862,0.759317
4,0.4746,0.769395,0.783694
5,0.249,0.750604,0.803729
6,0.1995,0.77091,0.825834


TrainOutput(global_step=1510, training_loss=0.5512127938353463, metrics={'train_runtime': 436.1622, 'train_samples_per_second': 55.461, 'train_steps_per_second': 3.462, 'total_flos': 6.005821436381491e+17, 'train_loss': 0.5512127938353463, 'epoch': 9.98})

In [53]:
trainer.save_model(f"../models/v.{CFG['MODEL_VER']}")

In [21]:
metrics = trainer.evaluate()
# some nice to haves:
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** eval metrics *****
  epoch                   =       9.98
  eval_f1                 =     0.8444
  eval_loss               =     0.7788
  eval_runtime            = 0:00:07.85
  eval_samples_per_second =    132.216
  eval_steps_per_second   =     33.118


## Inference

In [117]:
test = pd.read_csv('../data/test.csv')
test['img_path'] = test['img_path'].apply(lambda x: str(x).replace("./", "../data/"))

In [122]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform, image_processor)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=CFG['NUM_WORKERS'])
infer_model = SwinForImageClassification.from_pretrained("../models/").to(device)

In [131]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for inputs in tqdm(iter(test_loader)):
            inputs['pixel_values'] = inputs['pixel_values'].to(device)
            logits = model(**inputs).logits
            predicted_label = torch.argmax(logits, dim=-1).tolist()
            
            preds += [model.config.id2label[x] for x in predicted_label]
            
    return preds

In [132]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/198 [00:00<?, ?it/s]

## Submission

In [153]:
submit = pd.read_csv('../data/sample_submission.csv')

In [154]:
submit['label'] = preds

In [155]:
submit.to_csv(f"../submissions/submit_v.{CFG['MODEL_VER']}.csv", index=False)