# import

In [34]:
import os
import random

import timm
import wandb
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn.functional as F
from augraphy import *
from pytorch_lightning import LightningModule, Trainer, LightningDataModule
from pytorch_lightning.loggers import WandbLogger
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score

# Configs

In [35]:
wandb.finish()

In [36]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '../data/'

# model config
model_list = {
    1 : 'resnet34',
    2 : 'resnet50',
    3 : 'resnet101',
    4 : 'resnet152',
    6 : 'vit_small_patch16_224',
    5 : 'vit_base_patch16_224',
    7 : 'convnext_base.fb_in22k_ft_in1k',
    8 : 'vit_large_patch16_224',
    9 : 'convnextv2_huge.fcmae_ft_in1k'	, # Out of Memory
    10 : 'convnext_large.fb_in22k_ft_in1k', 
    11 : "convnextv2_base.fcmae_ft_in1k",
    12 : 'convnext_base.fb_in22k_ft_in1k_384',
    13 : 'vit_huge_patch14_224'	

}

model_family = {"resnet" : [model_list[1],
                            model_list[2],
                            model_list[3],
                            model_list[4],
                            model_list[7],
                            model_list[9],
                            model_list[10],
                            model_list[11],
                            model_list[12],],
                "vit" : [model_list[6],
                         model_list[5],
                         model_list[8],
                         model_list[13]]
                            }

num_classes=17

# training config

CFS={"MODEL" : model_list[13],
    "IMG_SIZE" : 224,
     "LR" : 1e-5,
    'EPOCHS' : 100,
    'BATCH_SIZE' : 8,
    "NUM_WORKERS" : 8,}

wandb_logger = WandbLogger(
    project="pl-migration",
    name=f"{CFS['MODEL']},{CFS['BATCH_SIZE']},{CFS['EPOCHS']},{CFS['LR']}",
    config=CFS,
)

In [37]:
model_list = timm.list_models()
print(model_list)

['bat_resnext26ts', 'beit_base_patch16_224', 'beit_base_patch16_384', 'beit_large_patch16_224', 'beit_large_patch16_384', 'beit_large_patch16_512', 'beitv2_base_patch16_224', 'beitv2_large_patch16_224', 'botnet26t_256', 'botnet50ts_256', 'caformer_b36', 'caformer_m36', 'caformer_s18', 'caformer_s36', 'cait_m36_384', 'cait_m48_448', 'cait_s24_224', 'cait_s24_384', 'cait_s36_384', 'cait_xs24_384', 'cait_xxs24_224', 'cait_xxs24_384', 'cait_xxs36_224', 'cait_xxs36_384', 'coat_lite_medium', 'coat_lite_medium_384', 'coat_lite_mini', 'coat_lite_small', 'coat_lite_tiny', 'coat_mini', 'coat_small', 'coat_tiny', 'coatnet_0_224', 'coatnet_0_rw_224', 'coatnet_1_224', 'coatnet_1_rw_224', 'coatnet_2_224', 'coatnet_2_rw_224', 'coatnet_3_224', 'coatnet_3_rw_224', 'coatnet_4_224', 'coatnet_5_224', 'coatnet_bn_0_rw_224', 'coatnet_nano_cc_224', 'coatnet_nano_rw_224', 'coatnet_pico_rw_224', 'coatnet_rmlp_0_rw_224', 'coatnet_rmlp_1_rw2_224', 'coatnet_rmlp_1_rw_224', 'coatnet_rmlp_2_rw_224', 'coatnet_rmlp_2

# Augraphy

In [38]:
ink_phase = [
    InkBleed(
        intensity_range=(0.5, 0.6),
        kernel_size=random.choice([(5, 5), (3, 3)]),
        severity=(0.2, 0.4),
        p=0.1,
    ),
    BleedThrough(
        intensity_range=(0.1, 0.3),
        color_range=(32, 224),
        ksize=(17, 17),
        sigmaX=1,
        alpha=random.uniform(0.1, 0.2),
        offsets=(10, 20),
        p=0.1,
    ),
],

paper_phase = [
    ColorPaper(
        hue_range=(0, 255),
        saturation_range=(10, 40),
        p=0.33,
    ),
    OneOf(
        [
        DelaunayTessellation(
            n_points_range=(500, 800),
            n_horizontal_points_range=(500, 800),
            n_vertical_points_range=(500, 800),
            noise_type="random",
            color_list="default",
            color_list_alternate="default",
            ),
        PatternGenerator(
            imgx=random.randint(256, 512),
            imgy=random.randint(256, 512),
            n_rotation_range=(10, 15),
            color="random",
            alpha_range=(0.25, 0.5),
            ),
        VoronoiTessellation(
            mult_range=(50, 80),
            seed=19829813472,
            num_cells_range=(500, 1000),
            noise_type="random",
            background_value=(200, 255),
            ),
        ],
        p=1.0,
    ),
    AugmentationSequence(
        [
            NoiseTexturize(
                sigma_range=(3, 10),
                turbulence_range=(2, 5),
            ),
            BrightnessTexturize(
                texturize_range=(0.9, 0.99),
                deviation=0.03,
            ),
        ],
    ),
]

post_phase = [
    OneOf(
        [
            DirtyDrum(
                line_width_range=(1, 6),
                line_concentration=random.uniform(0.05, 0.15),
                direction=random.randint(0, 2),
                noise_intensity=random.uniform(0.6, 0.95),
                noise_value=(64, 224),
                ksize=random.choice([(3, 3), (5, 5), (7, 7)]),
                sigmaX=0,
                p=0.2,
            ),
            DirtyRollers(
                line_width_range=(2, 32),
                scanline_type=0,
            ),
        ],
        p=0.33,
    ),
    Folding(
        fold_count=10,
        fold_noise=0.0,
        fold_angle_range = (-360,360),
        gradient_width=(0.1, 0.2),
        gradient_height=(0.01, 0.1),
        backdrop_color = (0,0,0),
        p=0.33
    ),
    SubtleNoise(
        subtle_range=random.randint(5, 10),
        p=0.33,
    ),
    Jpeg(
        quality_range=(25, 95),
        p=0.33,
    ),
    Moire(
        moire_density = (15,20),
        moire_blend_method = "normal",
        moire_blend_alpha = 0.1,
        p=0.33
    ),
    ColorShift(
        color_shift_offset_x_range=(3, 5),
        color_shift_offset_y_range=(3, 5),
        color_shift_iterations=(2, 3),
        color_shift_brightness_range=(0.9, 1.1),
        color_shift_gaussian_kernel_range=(3, 3),
        p=0.33
    ),
    Scribbles(
        scribbles_type="random",
        scribbles_location="random",
        scribbles_size_range=(250, 600),
        scribbles_count_range=(1, 6),
        scribbles_thickness_range=(1, 3),
        scribbles_brightness_change=[32, 64, 128],
        scribbles_text="random",
        scribbles_text_font="random",
        scribbles_text_rotate_range=(0, 360),
        scribbles_lines_stroke_count_range=(1, 6),
        p=0.1,
    ),
    BadPhotoCopy(
        noise_type=-1,
        noise_side="random",
        noise_iteration=(1, 2),
        noise_size=(1, 3),
        noise_value=(128, 196),
        noise_sparsity=(0.3, 0.6),
        noise_concentration=(0.1, 0.6),
        blur_noise=random.choice([True, False]),
        blur_noise_kernel=random.choice([(3, 3), (5, 5), (7, 7)]),
        wave_pattern=random.choice([True, False]),
        edge_effect=random.choice([True, False]),
        p=0.33,
    ),
    Faxify(
        scale_range=(0.3, 0.6),
        monochrome=random.choice([0, 1]),
        monochrome_method="random",
        monochrome_arguments={},
        halftone=random.choice([0, 1]),
        invert=1,
        half_kernel_size=random.choice([(1, 1), (2, 2)]),
        angle=(0, 360),
        sigma=(1, 3),
        p=0.1,
    ),
    Geometric(
        scale=(0.5, 1.5),
        translation=(50, -50),
        fliplr=1,
        flipud=1,
        crop=(),
        rotate_range=(3, 5),
        p=0.33,
    ),

]

pipeline = AugraphyPipeline(ink_phase=ink_phase, paper_phase=paper_phase, post_phase=post_phase)

class AugraphyTransform(ImageOnlyTransform):
    def __init__(self, augraphy_pipeline, always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.augraphy_pipeline = augraphy_pipeline

    def apply(self, img, **params):
        # NumPy → PIL 변환
        pil_img = Image.fromarray(img)
        # Augraphy 증강 적용
        aug_img = self.augraphy_pipeline(pil_img)
        # PIL → NumPy 변환
        return np.array(aug_img)

Augraphy = AugraphyTransform(augraphy_pipeline=pipeline, p=0.5)

wandb_logger.experiment.config["Augrapy"] = str(pipeline)


# Transform

In [None]:
# 스코어가 하락한 augementation 모음

# A.Affine(translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, rotate=(-15, 15), scale=(0.9, 1.1), shear=(-10, 10), p=0.5),
# A.Perspective(scale=(0.05, 0.1), p=0.5),
# A.SquareSymmetry(p=0.2),
# A.Transpose(p=0.5), 
# A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.3),
# A.RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.2)
# A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.2)


In [39]:
if CFS['MODEL'] in model_family['resnet']:
    norm_mean = [0.485, 0.456, 0.406]
    norm_std = [0.229, 0.224, 0.225]
else:
    norm_mean = [0.5, 0.5, 0.5]
    norm_std = [0.5, 0.5, 0.5]
    
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 0. augraphy
    Augraphy,
    
    # 1. 기하학적 변환 (Geometric Transformations)
    A.OneOf([
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.15, rotate_limit=15, p=0.5),
        A.OpticalDistortion(distort_limit=0.2, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5)
    ], p=1.0),
    
    # 2. 공간적 변형 (Spatial Transformations)
    A.OneOf([
        A.RandomCrop(height=int(CFS["IMG_SIZE"]*0.9), width=int(CFS["IMG_SIZE"]*0.9), p=0.7),
        A.RandomResizedCrop(size=(CFS["IMG_SIZE"], CFS["IMG_SIZE"]), scale=(0.8, 1.0), p=0.3),
        A.Transpose(p=0.3), 
        A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.2),
    ], p=1.0),
    
    # 3. 색상 변환 (Color Transformations)
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
        A.RandomGamma(gamma_limit=(80, 120), p=0.3),
        A.CLAHE(clip_limit=4.0, p=0.2),
    ], p=1.0),
    
    # 4. 노이즈 및 블러 (Noise & Blur)
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 50.0), mean=0.0, per_channel=True, p=0.4),
        A.GaussianBlur(blur_limit=(3, 7), p=0.3),
        A.MotionBlur(blur_limit=7, p=0.3),
    ], p=1.0),
    
    # 5. 고급 증강 기법 (Advanced Augmentations)
    A.OneOf([
        A.CoarseDropout(max_holes=8, max_height=16, max_width=16, fill_value=0, p=0.5), # cutout
        A.RandomSunFlare(src_radius=100, p=0.1),
        A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, p=0.2)
    ], p=1.0),
    
    # 6. 최종 전처리
    A.Resize(CFS["IMG_SIZE"], CFS['IMG_SIZE']),
    A.Normalize(mean=norm_mean, std=norm_std),
    ToTensorV2()
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(CFS["IMG_SIZE"], CFS['IMG_SIZE']),
    A.Normalize(mean=norm_mean, std=norm_std),
    ToTensorV2(),
])

# WandB에 로깅
wandb_logger.experiment.config["train_transform"] = str(trn_transform)
wandb_logger.experiment.config["test_transform"] = str(tst_transform)

# print(transform_str)
# WandB에 파라미터 로깅
# wandb_logger.experiment.config["train_transform"] = get_transform_params(trn_transform)
# wandb_logger.experiment.config["test_transform"] = get_transform_params(tst_transform)


# Dataset

In [30]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

# Datamodule

In [41]:
class DataModule(LightningDataModule):
    def __init__(self, data_path, train_transform, test_transform, batch_size, num_workers):
        super().__init__()
        self.data_path = data_path
        self.train_transform = train_transform
        self.test_transform = test_transform
        self.batch_size = batch_size
        self.num_workers = num_workers

    def setup(self, stage=None):
        if stage == "fit" or stage is None:
            self.train_dataset = ImageDataset(
                csv=os.path.join(self.data_path, "train.csv"),
                path=os.path.join(self.data_path, "train"),
                transform=self.train_transform
            )
            
        if stage == "test" or stage == "predict" or stage is None:
            self.test_dataset = ImageDataset(
                csv=os.path.join(self.data_path, "sample_submission.csv"),
                path=os.path.join(self.data_path, "test"),
                transform=self.test_transform
            )

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True,
            drop_last=False
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True
        )
    
    def predict_dataloader(self):
        return self.test_dataloader()
    
datamodule = DataModule(data_path='../data/',
    train_transform=trn_transform,
    test_transform=tst_transform,
    batch_size=CFS['BATCH_SIZE'],
    num_workers=CFS['NUM_WORKERS']
)

# Model

In [42]:
class LightningModel(LightningModule):
    def __init__(self, model_name, num_classes, lr):  
        super().__init__()

        self.model = timm.create_model(model_name=model_name, 
                                       pretrained=True,
                                       num_classes=num_classes)
        self.lr = lr
        self.num_classes = num_classes
        self.train_preds = []      # 예측값 저장
        self.train_targets = []    # 타겟 저장
        self.train_losses = []     # 배치별 손실 저장

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        pred = y_hat.argmax(dim=1)
        
        # 배치 단위 로깅
        self.log('train_loss_step', loss, prog_bar=True)
        
        # 에포크 종료 시 메트릭 계산을 위한 데이터 수집
        self.train_preds.append(pred.detach().cpu())
        self.train_targets.append(y.detach().cpu())
        self.train_losses.append(loss.detach().cpu())  # 손실 추가 저장
        
        return loss

    def on_train_epoch_end(self):
        # 전체 에포크 데이터 수집
        all_preds = torch.cat(self.train_preds)
        all_targets = torch.cat(self.train_targets)
        
        # 에포크 평균 손실 계산
        epoch_loss = torch.stack(self.train_losses).mean()  # 중요!
        
        # 메트릭 계산
        epoch_acc = accuracy_score(all_targets.numpy(), all_preds.numpy())
        epoch_f1 = f1_score(all_targets.numpy(), all_preds.numpy(), average='macro')
        
        # 로깅 (epoch_loss 포함)
        self.log('train_loss', epoch_loss, prog_bar=True)
        self.log('train_acc', epoch_acc, prog_bar=True)
        self.log('train_f1', epoch_f1, prog_bar=True)
        
        # 다음 에포크를 위해 리셋
        self.train_preds.clear()
        self.train_targets.clear()
        self.train_losses.clear()  # 손실 리스트 초기화

    def predict_step(self, batch, batch_idx):
        x, _ = batch      
        y_hat = self(x)
        return y_hat.argmax(dim=1)

    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=self.lr)
        scheduler = StepLR(optimizer, step_size=45, gamma=0.5)
    #     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #     optimizer, mode='min', factor=0.1, patience=5
    #       )
    #        return {
    #           "optimizer": optimizer,
    #           "lr_scheduler": {
    #               "scheduler": scheduler,
    #               "monitor": "val_loss"  # 검증 손실 기반
    #           }
    #       }
        return {
        "optimizer": optimizer,
        "lr_scheduler": {
            "scheduler": scheduler,
            "interval": "epoch",  # epoch마다 step
            "frequency": 1
        }
    }

lightning_model = LightningModel(CFS['MODEL'], num_classes, CFS["LR"])

model.safetensors:   0%|          | 0.00/2.52G [00:00<?, ?B/s]

# Train

In [None]:
# 트레이너 설정
trainer = Trainer(
    max_epochs=CFS["EPOCHS"],
    accelerator='cuda' if torch.cuda.is_available() else 'cpu',
    devices="auto",
    logger=wandb_logger,
    # callbacks=[
    #     pl.callbacks.ModelCheckpoint(
    #         dirpath="checkpoints/",
    #         filename="{epoch}-{val_loss:.2f}",
    #         save_top_k=3,
    #         monitor="val_loss"  # 검증 데이터 있을 때만 유효
    #     )
    # ]
)

# 학습 실행
trainer.fit(
    model=lightning_model,
    datamodule=datamodule
)

wandb.finish()


Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params | Mode 
----------------------------------------------------
0 | model | VisionTransformer | 630 M  | train
----------------------------------------------------
630 M     Trainable params
0         Non-trainable params
630 M     Total params
2,523.146 Total estimated model params size (MB)
684       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

# Prediction

In [22]:
predictions = trainer.predict(
    model=lightning_model, 
    datamodule=datamodule
)

# 4. 결과 처리
all_preds = torch.cat(predictions).cpu().numpy()  # [n_samples]
# 샘플 제출 파일 로드
submission = pd.read_csv(os.path.join(data_path, "sample_submission.csv"))
# 예측값으로 타겟 열 업데이트
submission["target"] = all_preds


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [24]:
# 결과 저장
submission.to_csv("submission.csv", index=False)

In [None]:
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

# 모델저장, 불러오기 - 작업중

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint

# 체크포인트 콜백 설정
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints/",               # 저장 경로
    filename="{epoch}-{val_loss:.2f}",    # 파일명 형식
    save_top_k=2,                         # 상위 2개 모델 저장
    monitor="train_loss",                   # 모니터링 지표
    mode="min",                           # 최소화 대상
    save_weights_only=False               # 전체 상태 저장 (권장)
)

# 트레이너에 콜백 추가
trainer = Trainer(
    callbacks=[checkpoint_callback],
    max_epochs=CFS['EPOCHS'],
    logger=wandb_logger
)

# 특정 지점에서 수동 저장
trainer.save_checkpoint("manual_save.ckpt")

# 모델 아키텍처 초기화
loaded_model = LightningModel.load_from_checkpoint(
    checkpoint_path="checkpoints/epoch=10-val_loss=0.32.ckpt",
    model_name=CFS['MODEL'],
    num_classes=num_classes,
    lr=CFS['LR']
)

# 예측 실행
predictions = trainer.predict(model=loaded_model, datamodule=datamodule)

# 이전 체크포인트에서 학습 재개
trainer = Trainer(resume_from_checkpoint="checkpoints/last.ckpt")
trainer.fit(loaded_model, datamodule=datamodule)


In [None]:

# 모델 아키텍처 초기화
loaded_model = LightningModel.load_from_checkpoint(
    checkpoint_path="/data/ephemeral/home/dev/doc_classification/src/pl-migration/qv19z0sy/checkpoints/epoch=79-step=4000.ckpt",
    model_name=CFS['MODEL'],
    num_classes=num_classes,
    lr=CFS['LR']
)
predictions = trainer.predict(model=loaded_model, datamodule=datamodule)



In [23]:
# 트레이너 설정
trainer = Trainer(
    max_epochs=CFS["EPOCHS"],
    accelerator='cuda' if torch.cuda.is_available() else 'cpu',
    devices="auto",
    logger=wandb_logger,
    # callbacks=[
    #     pl.callbacks.ModelCheckpoint(
    #         dirpath="checkpoints/",
    #         filename="{epoch}-{val_loss:.2f}",
    #         save_top_k=3,
    #         monitor="val_loss"  # 검증 데이터 있을 때만 유효
    #     )
    # ]
)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
predictions = trainer.predict(model=loaded_model, datamodule=datamodule)


You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]