In [1]:
import os
import torch
import pandas as pd
import numpy as np

from Load_Data import random_seed
from Train import Trainer
from Inference import Model_Ensemble, run_inference
from PseudoLabel import PseudoLabeler

In [2]:
# 시드 고정
random_seed(42)

# 기본 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
img_size = 384
LR = 1e-5
EPOCHS = 60
BATCH_SIZE = 14
Drop_out = 0.4
weight_decay = 5e-4
num_workers = 4
num_classes = 17
n_splits = 3
patience = 5
model_name = "convnextv2_large.fcmae_ft_in22k_in1k_384"
# model_name = "convnext_large_in22ft1k" # batch_size=14~16
# model_name = "convnext_base_in22ft1k" # batch_size=32
# model_name = "maxvit_large_tf_384"

# 경로
run_name = "V18"
train_csv_path = "../data/train_update2.csv"
test_csv_path = "../data/sample_submission.csv"
original_train_path = "../data/train/"
test_path = "../data/test/"
model_save_path = f"../model/{run_name}/"
augmented_save_path = f"../data/augment_image/{run_name}-augmented"
augmented_csv_save_path = f"../data/augment_csv/{run_name}-augmented.csv"
submission_path = f"../data/submission/{run_name}-submission.csv"

In [3]:
# 원본 데이터
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)

# Trainer 생성
trainer = Trainer(
    df=train_df,
    original_data_path=original_train_path,
    augmented_save_path=augmented_save_path,
    augmented_csv_save_path=augmented_csv_save_path,
    model_name=model_name,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    lr=LR,
    drop_out=Drop_out,
    weight_decay=weight_decay,
    img_size=img_size,
    num_workers=num_workers,
    device=device,
    save_dir=model_save_path,
    run_name_prefix=run_name,
    num_classes=num_classes,
    n_splits=n_splits,
    patience=patience,
    k_fold=True,
    augmentation_target_count=500
)

# K-Fold 분할, 증강, 학습 모두 실행
f1_df = trainer.run()

Calculating aspect ratios: 100%|██████████| 1570/1570 [00:00<00:00, 18364.11it/s]








KeyboardInterrupt: 

In [6]:
test_df = pd.read_csv(test_csv_path)

# 앙상블 모델
fold_weights = np.array([0.9416, 0.94768, 0.93204])
# fold_weights = f1_df["f1"].values
ensembler = Model_Ensemble(
    model_name=model_name,
    fold_paths_dir=model_save_path,
    fold_weights=fold_weights,
    num_classes=num_classes,
    drop_out=Drop_out,
    device=device,
    k_fold=True
)

# 추론 실행 (TTA 적용)
run_inference(
    ensembler=ensembler,
    submission_df=test_df.copy(),
    test_path=test_path,
    img_size=img_size,
    save_path=submission_path, # 저장
    batch_size=BATCH_SIZE,
    num_workers=num_workers,
    use_tta=False # TTA 
)

Inference: 100%|██████████| 449/449 [06:52<00:00,  1.09it/s]

[✓] Saved submission to: ../data/submission/V17-submission.csv





In [None]:
# 의사 레이블링 실행기 생성
labeler = PseudoLabeler(
    ensembler=initial_ensembler,
    device=device,
    img_size=img_size,
    batch_size=BATCH_SIZE,
    num_workers=num_workers
)

# 의사 레이블 생성, 데이터 결합, 파일 저장까지 모두 실행
final_df, final_image_path = labeler.run(
    original_aug_df=initial_trainer.df, # strata 정보가 포함된 원본 df
    original_aug_path=initial_augmented_save_path,
    test_df=test_df,
    test_path=test_path,
    confidence_threshold=0.97,
    save_base_dir=combined_data_save_dir,
    run_name=f"{final_run_name}-Pseudo-Data"
)