# **📄 Document type classification pipeline**

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [1]:
import os
import time
import random
import wandb

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import datetime
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'datasets_fin/'

# model config
model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 32
LR = 1e-3
EPOCHS = 10
BATCH_SIZE = 32
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [7]:
data_path = "/home/dev/computervisioncompetition-cv3/data"

# Dataset 정의
trn_dataset = ImageDataset(
    data_path + "/train.csv",
    data_path + "/train/",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    data_path + "/sample_submission.csv",
    data_path + "/test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

1570 3140


In [8]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [9]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [10]:
# wandb 설정
wandb.init(
    project="computervisioncompetition-cv3",
    name= f'{datetime.datetime.now().strftime("%Y%m%d")}_{model_name}_i{img_size}_b{BATCH_SIZE}_lr{LR}_e{EPOCHS}',
    config={
        "model": model_name,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "learning_rate": LR,
        "img_size": img_size,
        "num_workers": num_workers,
        "seed": SEED,
        "data_path": data_path,
        "optimizer": optimizer,
        "loss_fn": loss_fn,
        "device": device,
        "transform": trn_transform,
        "tst_transform": tst_transform,
        "trn_loader": trn_loader,
        "tst_loader": tst_loader,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mvisioncraft_james[0m ([33mvisioncraft_james-open-university-of-korea[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
f1_result = 0
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      if k == 'train_f1':
        f1_result = v
      log += f"{k}: {v:.4f}\n"
    
    wandb.log({
      "train_loss": ret['train_loss'],
      "train_acc": ret['train_acc'],
      "train_f1": ret['train_f1'],
    })
    print(log)
    

Loss: 2.3725: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


train_loss: 2.4855
train_acc: 0.2516
train_f1: 0.2183
epoch: 0.0000



Loss: 2.8368: 100%|██████████| 50/50 [00:14<00:00,  3.51it/s]


train_loss: 1.7477
train_acc: 0.4828
train_f1: 0.4218
epoch: 1.0000



Loss: 2.7577: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]


train_loss: 1.3160
train_acc: 0.5987
train_f1: 0.5552
epoch: 2.0000



Loss: 2.5141: 100%|██████████| 50/50 [00:14<00:00,  3.40it/s]


train_loss: 1.0425
train_acc: 0.6650
train_f1: 0.6293
epoch: 3.0000



Loss: 5.2802: 100%|██████████| 50/50 [00:14<00:00,  3.50it/s]


train_loss: 0.9126
train_acc: 0.7325
train_f1: 0.7106
epoch: 4.0000



Loss: 4.3493: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]


train_loss: 0.7187
train_acc: 0.7854
train_f1: 0.7633
epoch: 5.0000



Loss: 2.5458: 100%|██████████| 50/50 [00:14<00:00,  3.45it/s]


train_loss: 0.6134
train_acc: 0.8089
train_f1: 0.7940
epoch: 6.0000



Loss: 3.2005: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]


train_loss: 0.5229
train_acc: 0.8363
train_f1: 0.8208
epoch: 7.0000



Loss: 1.4057: 100%|██████████| 50/50 [00:14<00:00,  3.52it/s]


train_loss: 0.7780
train_acc: 0.7465
train_f1: 0.7267
epoch: 8.0000



Loss: 1.3997: 100%|██████████| 50/50 [00:14<00:00,  3.52it/s]

train_loss: 0.5858
train_acc: 0.8038
train_f1: 0.7805
epoch: 9.0000






# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [12]:


today = datetime.datetime.now().strftime("%Y%m%d")
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:26<00:00,  3.75it/s]


In [13]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [14]:
sample_submission_df = pd.read_csv(data_path + "/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [15]:
def truncate(number, digits):
    step = 10.0 ** digits
    return int(number * step) / step

print(truncate(f1_result, 3))
pred_df.to_csv(f"submission/sub_{today}_2_f1_{truncate(f1_result, 4)}.csv", index=False)

0.78


In [16]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,10
2,00396fbc1f6cc21d.jpg,16
3,00471f8038d9c4b6.jpg,6
4,00901f504008d884.jpg,2
