# Notebook 기본 세팅

In [1]:
# Constant 선언

# 프로젝트 루트 디렉토리를 식별하기 위한 마커 파일 이름
ROOT_MARKER = "pyproject.toml"

# 한글 표시를 위한 나눔바른고딕 폰트 파일 이름
# matplotlib 의 font_manager 에 실제 폰트 파일의 위치를 넣어주어야 한다.
KOREAN_FONT_FILE = "NanumBarunGothic.ttf"

# matplotlib 에서는 font-family 의 이름으로 font 를 설정한다.
# 그래서 font 파일 그 자체가 아니라, 그 파일의 family 이름을 적어준다.
KOREAN_FONT_FAMILY = "NanumBarunGothic"

# 참고
# Font Family 와 Font File 의 차이는,
# Font Family 는 비슷한 디자인 특성을 공유하는 글꼴 그룹을 의미한다.
#
# 예를 들어 '나눔바른고딕' 폰트 패밀리는 일반(Regular), 굵게(Bold), 기울임(Italic) 등 여러 스타일을 포함할 수 있다.
# 반면, 폰트 파일(.ttf, .otf 등)은 이러한 폰트의 하나의 스타일이 저장된 실제 파일이다.
#
# 이 프로젝트에서는 폰트 용량을 줄이기 위해 일반(Regular) 인 NanumBarunGothic.ttf 만 사용한다.

In [2]:
# 프로젝트 root 를 sys.path 에 추가해서 import 구문을 사용하기 쉽게
from pathlib import Path


def find_project_root() -> Path:
    """
    pyproject.toml 파일을 기준으로 루트 디렉토리를 찾는다.
    :return: Path: 프로젝트 루트 디렉토리 경로
    """

    current_path = Path().resolve()

    while current_path != current_path.parent:
        if (current_path / ROOT_MARKER).exists():
            return current_path

        current_path = current_path.parent

    raise FileNotFoundError("프로젝트 루트 디렉토리를 찾을 수 없습니다.")


ROOT_DIR = find_project_root()
DATA_DIR = ROOT_DIR / "data"

In [3]:
# matplotlib 의 한글 font 설정
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt


FONTS_DATA_DIR = DATA_DIR / "fonts"


def setup_korean_font():
    font_path = FONTS_DATA_DIR / KOREAN_FONT_FILE
    fm.fontManager.addfont(font_path)

    # 폰트 설정
    plt.rcParams["font.family"] = KOREAN_FONT_FAMILY
    plt.rcParams["axes.unicode_minus"] = False


setup_korean_font()

# Baseline Code 분석하기

In [4]:
import random

import numpy as np
import torch


def fix_random_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

In [5]:
SEED = 4321
fix_random_seed(SEED)

## Dataset 정의

In [6]:
from pathlib import Path

import pandas as pd
from PIL import Image
from torch.utils.data import Dataset


class DocumentImageDataset(Dataset):
    def __init__(self, csv_path: Path, image_dir: Path, transform=None):
        self.image_metadata_df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return self.image_metadata_df.shape[0]

    def __getitem__(self, idx: int):
        image_name, target = self.image_metadata_df.iloc[idx]
        img = np.array(Image.open(str(self.image_dir / image_name)))
        if self.transform:
            img = self.transform(image=img)["image"]
        return img, target

In [7]:
import albumentations
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader


tmp_transform = albumentations.Compose(
    [
        albumentations.Resize(32, 32),
        albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

tmp_img_dataset = DocumentImageDataset(
    csv_path=DATA_DIR / "raw" / "train.csv", image_dir=DATA_DIR / "raw" / "train", transform=tmp_transform
)
print("total image size of train:", len(tmp_img_dataset))

tmp_dataloader = DataLoader(
    tmp_img_dataset,
    batch_size=4,
    shuffle=True,
)
# 데이터셋이 제대로 동작하는지 확인
for b_imgs, b_targets in tmp_dataloader:
    print("Shape of imgs:", b_imgs.shape, "| Shape of targets:", b_targets.shape)
    break

total image size of train: 1570
Shape of imgs: torch.Size([4, 3, 32, 32]) | Shape of targets: torch.Size([4])


- 여러 플랫폼에서 실행해도 torch.device 를 사용할 수 있도록 함수 정의

In [8]:
def get_device() -> torch.device:
    if torch.backends.mps.is_available():
        return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

## train 함수 정의

In [9]:
from sklearn.metrics import accuracy_score, f1_score
from torch import nn, optim
from tqdm import tqdm


def train_per_one_epoch(
    loader: DataLoader, model: nn.Module, optimizer: optim.Optimizer, loss_fn: nn.Module, device: torch.device
):
    model.train()
    train_loss = 0
    predictions_list = []
    targets_list = []

    pbar = tqdm(loader)
    for images, targets in pbar:
        model.zero_grad()

        images_in_device = images.to(device)
        targets_in_device = targets.to(device)

        preds = model(images_in_device)

        loss = loss_fn(preds, targets_in_device)
        loss.backward()

        optimizer.step()

        train_loss += loss.item()

        predictions_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets_in_device.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    avg_train_loss = train_loss / len(loader)
    train_accuracy = accuracy_score(targets_list, predictions_list)
    train_f1 = f1_score(targets_list, predictions_list, average="macro")
    return {
        "avg_train_loss": avg_train_loss,
        "train_accuracy": train_accuracy,
        "train_f1": train_f1,
    }

## Hyper-parameters

In [10]:
local_device = get_device()

RAW_DIR = DATA_DIR / "raw"
MODEL_NAME = "resnet34"
IMAGE_SIZE = 32
LEARNING_RATE = 1e-3
EPOCHS = 10
BATCH_SIZE = 64

## Load Data

In [11]:
# train image 의 변환을 위한 transform
train_transform = albumentations.Compose(
    [
        albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
        albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

# test image 의 변환을 위한 transform
test_transform = albumentations.Compose(
    [
        albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
        albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

In [12]:
train_dataset = DocumentImageDataset(
    csv_path=RAW_DIR / "train.csv", image_dir=RAW_DIR / "train", transform=train_transform
)

test_dataset = DocumentImageDataset(
    csv_path=RAW_DIR / "sample_submission.csv", image_dir=RAW_DIR / "test", transform=test_transform
)

len(train_dataset), len(test_dataset)

(1570, 3140)

In [13]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

## Train Model

In [14]:
import timm


pretrained_model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=17).to(local_device)

criterion = nn.CrossEntropyLoss()

adam_optimizer = optim.Adam(pretrained_model.parameters(), lr=LEARNING_RATE)

In [15]:
for epoch in range(EPOCHS):
    r = train_per_one_epoch(
        loader=train_loader, model=pretrained_model, optimizer=adam_optimizer, loss_fn=criterion, device=local_device
    )

    for k, v in r.items():
        print("    ", f"{k}: {v}")
    print("    ", "epoch:", epoch + 1)

Loss: 2.1448: 100%|██████████| 25/25 [00:03<00:00,  6.67it/s]


     avg_train_loss: 2.553156518936157
     train_accuracy: 0.23312101910828026
     train_f1: 0.1885272313025145
     epoch: 1


Loss: 1.6273: 100%|██████████| 25/25 [00:02<00:00, 10.41it/s]


     avg_train_loss: 1.8247415590286256
     train_accuracy: 0.4694267515923567
     train_f1: 0.4007415035881568
     epoch: 2


Loss: 1.0917: 100%|██████████| 25/25 [00:02<00:00, 10.47it/s]


     avg_train_loss: 1.415084147453308
     train_accuracy: 0.5910828025477707
     train_f1: 0.5165581199173994
     epoch: 3


Loss: 1.3204: 100%|██████████| 25/25 [00:02<00:00, 10.51it/s]


     avg_train_loss: 1.0024565529823304
     train_accuracy: 0.6968152866242038
     train_f1: 0.6360485189997257
     epoch: 4


Loss: 0.5621: 100%|██████████| 25/25 [00:02<00:00, 10.48it/s]


     avg_train_loss: 0.7155654644966125
     train_accuracy: 0.7694267515923567
     train_f1: 0.7307302347971265
     epoch: 5


Loss: 0.8697: 100%|██████████| 25/25 [00:02<00:00, 10.32it/s]


     avg_train_loss: 0.5203204691410065
     train_accuracy: 0.8254777070063695
     train_f1: 0.8047571936316199
     epoch: 6


Loss: 0.4421: 100%|██████████| 25/25 [00:02<00:00,  9.53it/s]


     avg_train_loss: 0.3899052917957306
     train_accuracy: 0.867515923566879
     train_f1: 0.8548551761490054
     epoch: 7


Loss: 0.0818: 100%|██████████| 25/25 [00:02<00:00,  9.67it/s]


     avg_train_loss: 0.28459398180246354
     train_accuracy: 0.9127388535031847
     train_f1: 0.9070205906431783
     epoch: 8


Loss: 0.2334: 100%|██████████| 25/25 [00:02<00:00, 10.29it/s]


     avg_train_loss: 0.22844529867172242
     train_accuracy: 0.924203821656051
     train_f1: 0.9181047873984138
     epoch: 9


Loss: 0.4405: 100%|██████████| 25/25 [00:02<00:00, 10.00it/s]

     avg_train_loss: 0.22436793237924577
     train_accuracy: 0.9312101910828026
     train_f1: 0.9241498684192844
     epoch: 10





## Inference

In [16]:
def inference(loader: DataLoader, model: nn.Module, device: torch.device):
    model.eval()
    predictions_list = []
    for images, _ in tqdm(loader):
        images_in_device = images.to(device)
        preds = model(images_in_device)
        predictions_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
    return predictions_list

In [17]:
preds_list = inference(test_loader, pretrained_model, local_device)

100%|██████████| 50/50 [00:05<00:00,  8.49it/s]


In [18]:
result = test_dataset.image_metadata_df.copy()
result["target"] = preds_list

In [19]:
result.to_csv(RAW_DIR / "predictions.csv", index=False)