## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import math
import random
# import cv2
from PIL import Image
from util import *

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from transformers import AutoFeatureExtractor, SwinForImageClassification, AutoImageProcessor, Trainer, TrainingArguments

import wandb

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
%env WANDB_PROJECT=WallPaperDefectTypeClassification
%env WANDB_NOTEBOOK_NAME=./baseline.ipynb
%env WANDB_LOG_MODEL=end
%env WANDB_WATCH=all

env: WANDB_PROJECT=WallPaperDefectTypeClassification
env: WANDB_NOTEBOOK_NAME=./baseline.ipynb
env: WANDB_LOG_MODEL=end
env: WANDB_WATCH=all


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device.type

'cuda'

## Hyperparameter Setting

In [4]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':5,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':8,
    'WEIGHT_DECAY':0.01,
    'WARMUP_RATIO':0.1,
    'SEED':42,
    'NUM_WORKERS':2,
    'PRETRAINED_MODEL': "microsoft/swin-tiny-patch4-window7-224",
    'MODEL_VER' : "0.0.2",
}

## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [6]:
all_img_list = glob.glob('../data/train/*/*')

In [7]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x: str(x).split('/')[-2]).astype(int)

In [8]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.3, stratify=df['label'], random_state=CFG['SEED'])

## Label-Encoding

In [9]:
# le = preprocessing.LabelEncoder()
# train['label'] = le.fit_transform(train['label'])
# val['label'] = le.transform(val['label'])

## CustomDataset

<img src=https://d2.naver.com/content/images/2021/01/efbe9400-5214-11eb-9c67-30fab62770ec.png>

**Albumentation Tutorials**<br>
https://github.com/albumentations-team/albumentations_examples/blob/master/notebooks/migrating_from_torchvision_to_albumentations.ipynb

In [10]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None, processor=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        self.processor = processor

    def __getitem__(self, index):
        
        img_path = self.img_path_list[index]
        image = Image.open(img_path)
        image_tr = self.transforms(image=np.array(image))['image']
        pixel_values = self.processor(image_tr, return_tensors="pt").pixel_values.squeeze()
        
        if self.label_list is not None:
            label = self.label_list[index]
            return {
                'pixel_values': pixel_values, 
                'label': label,
                }
        else:
            return {
                'pixel_values': pixel_values,
                }
        
    def __len__(self):
        return len(self.img_path_list)

In [11]:
train_transform = A.Compose([
                            A.HorizontalFlip(p=0.5),
                            A.RandomBrightnessContrast(p=0.5),
                            A.RandomScale(scale_limit=0.1, p=0.5),
                            A.RandomCropFromBorders(p=0.5),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            # Augmentations
                            ToTensorV2()
                            ])

In [12]:
model_checkpoint = CFG['PRETRAINED_MODEL']
image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [13]:
train_dataset = CustomDataset(
    train['img_path'].values, train['label'].values, train_transform, image_processor)

val_dataset = CustomDataset(
    val['img_path'].values, val['label'].values, test_transform, image_processor)


## Model Define

In [14]:
labels = pd.read_csv("../data/map.csv")['Categories']

In [15]:
model = SwinForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
).to(device)

print_gpu_utilization()


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([19, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([19]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPU memory occupied: 1707 MB.


## Train

In [16]:
import evaluate

metric = evaluate.load("f1")

def compute_metrics(p):
  return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids, average='weighted')


In [17]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch]).type(torch.LongTensor),
    }

In [18]:
sweep_config = {
    'method': 'random',
    'metric' : {
        'name': 'eval/f1',
        'goal': 'maximize'   
        },
    'parameters' : {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 1e-5,
            'max': 1e-3
        },
        'weight_decay': {
            'values': [0.0, 0.1, 0.2, 0.3]
        },
        'warmup_ratio':{
            'values': [0.0, 0.1, 0.2]
        },
        'lr_scheduler_type':{
            'values': ['linear', 'cosine']
        },
    }
}

In [19]:
sweep_id = wandb.sweep(sweep_config, project=os.environ['WANDB_PROJECT'])

Create sweep with ID: iqe2ogyw
Sweep URL: https://wandb.ai/2gnldud/WallPaperDefectTypeClassification/sweeps/iqe2ogyw


```
class SchedulerType(ExplicitEnum):
    LINEAR = "linear"
    COSINE = "cosine"
    COSINE_WITH_RESTARTS = "cosine_with_restarts"
    POLYNOMIAL = "polynomial"
    CONSTANT = "constant"
    CONSTANT_WITH_WARMUP = "constant_with_warmup"
    INVERSE_SQRT = "inverse_sqrt"
```


In [20]:
model_name = model_checkpoint.split("/")[-1]

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        args = TrainingArguments(
            output_dir=f"../outputs/{model_name}-finetuned",
            overwrite_output_dir=True,
            remove_unused_columns=False,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=config.learning_rate,
            per_device_train_batch_size=CFG['BATCH_SIZE'],
            per_device_eval_batch_size=CFG['BATCH_SIZE'],
            num_train_epochs=CFG['EPOCHS'],
            weight_decay=config.weight_decay,
            logging_steps=10,
            gradient_accumulation_steps=4,
            dataloader_num_workers=CFG['NUM_WORKERS'],
            warmup_ratio=config.warmup_ratio,
            fp16=True,
            lr_scheduler_type=config.lr_scheduler_type,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            run_name='v.'+CFG['MODEL_VER'],
            report_to='wandb',
        )

        trainer = Trainer(
            model=model,
            args=args,
            data_collator=collate_fn,
            compute_metrics=compute_metrics,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            tokenizer=image_processor,
        )

        trainer.train()

In [21]:
wandb.agent(sweep_id, train, count=10)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: kux3rl86 with config:
[34m[1mwandb[0m: 	learning_rate: 7.946814763316721e-05
[34m[1mwandb[0m: 	lr_scheduler_type: linear
[34m[1mwandb[0m: 	warmup_ratio: 0
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: Currently logged in as: [33m2gnldud[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss


0,1
eval/f1,▁▅▇▇█
eval/loss,█▃▂▁▁
eval/runtime,▁▃▄▆█
eval/samples_per_second,█▆▅▃▁
eval/steps_per_second,█▆▅▃▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/learning_rate,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▆▅▄▄▄▃▄▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/total_flos,▁

0,1
eval/f1,0.8581
eval/loss,0.51384
eval/runtime,6.7177
eval/samples_per_second,154.518
eval/steps_per_second,19.352
train/epoch,4.95
train/global_step,375.0
train/learning_rate,0.0
train/loss,0.2051
train/total_flos,2.979161972914176e+17


[34m[1mwandb[0m: Agent Starting Run: vaw98xsp with config:
[34m[1mwandb[0m: 	learning_rate: 0.00013260491325986333
[34m[1mwandb[0m: 	lr_scheduler_type: linear
[34m[1mwandb[0m: 	warmup_ratio: 0.2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [23]:
api = wandb.Api()

sweep = api.sweep(f"2gnldud/WallPaperDefectTypeClassification/{sweep_id}")
runs = sorted(sweep.runs,
  key=lambda run: run.summary.get("eval/f1", 0), reverse=True)
val_f1 = runs[0].summary.get("eval/f1", 0)
print(f"Best run {runs[0].name} with {val_f1}% validation accuracy")

runs[0].file("model.h5").download(replace=True)
print("Best model saved to model-best.h5")

Best run confused-sweep-3 with 0.8607638734588623% validation accuracy


CommError: It appears that you do not have permission to access the requested resource. Please reach out to the project owner to grant you access. If you have the correct permissions, verify that there are no issues with your networking setup.(Error 404: Not Found)

trainer.save_model(f"../models/v.{CFG['MODEL_VER']}")

metrics = trainer.evaluate()
# some nice to haves:
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

## Inference

In [24]:
test = pd.read_csv('../data/test.csv')
test['img_path'] = test['img_path'].apply(lambda x: str(x).replace("./", "../data/"))

In [26]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform, image_processor)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=CFG['NUM_WORKERS'])
infer_model = SwinForImageClassification.from_pretrained(f"../models/v.{CFG['MODEL_VER']}").to(device)

In [27]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for inputs in tqdm(iter(test_loader)):
            inputs['pixel_values'] = inputs['pixel_values'].to(device)
            logits = model(**inputs).logits
            predicted_label = torch.argmax(logits, dim=-1).tolist()
            
            preds += [model.config.id2label[x] for x in predicted_label]
            
    return preds

In [28]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/198 [00:00<?, ?it/s]

## Submission

In [29]:
submit = pd.read_csv('../data/sample_submission.csv')

In [30]:
submit['label'] = preds

In [31]:
submit.to_csv(f"../submissions/submit_v.{CFG['MODEL_VER']}.csv", index=False)