# patch_size를 1,3,5,7,9,11로 바꾸어 가면서 테스트 해 보자

In [None]:
from utils.utils import train_model, evaluate_model_with_cm, TiffDataset
from models.image_classifier import ResNetLike_48
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import os
import pandas as pd

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [26]:
test_filter = lambda box_number: (box_number % 9 == 0 or box_number % 9 == 5)

### 데이터 준비함수, 학습 평가 함수 정의

In [None]:
# 이미지 전처리 (Naive)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.float()),  # uint16 → float 변환
])

# 데이터셋 생성 함수
def create_datasets(patch_size):
    dataset_kwargs = {
        "large_tif_dir": r"C:\\Users\\taebin\\Desktop\\my_study\\capstone_design\\data3\\원천 데이터\\naive",
        "file_list": ["jiri_1.tif", "jiri_2.tif", "sobaek.tif"],
        "label_file": r"C:\\Users\\taebin\\Desktop\\my_study\\capstone_design\\data3\\라벨링 데이터\\label_mapping_sampled_5.csv",
        "patch_size": patch_size,
        "transform": transform
    }
    return (
        TiffDataset(**dataset_kwargs, box_filter_fn=lambda box_number: not test_filter(box_number)),  # train
        TiffDataset(**dataset_kwargs, box_filter_fn=test_filter)  # val
    )

# 데이터로더 생성 함수
def create_dataloaders(train_dataset, val_dataset, batch_size=32, num_workers=0):
    return (
        DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers),
        DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    )

# 학습 및 평가 함수
def train_and_evaluate(patch_size, train_loader, val_loader, num_epochs=30):
    print(f"Training with patch size: {patch_size}")

    model = ResNetLike_48(num_classes=6).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_model_state, train_losses, val_losses = train_model(
        model, train_loader, val_loader, criterion, optimizer, num_epochs=num_epochs, patience=100
    )
    
    torch.save(best_model_state, f"patch_size_test_{patch_size}_{num_epochs}.pth")
    model.load_state_dict(best_model_state)
    
    print("\ntrain data")
    evaluate_model_with_cm(model, train_loader, num_classes=6)
    print("\nvalidation data")
    evaluate_model_with_cm(model, val_loader, num_classes=6)


### 패치사이즈를 바꾸어가며 테스트

In [None]:
patch_sizes = [1, 3, 5, 7, 9, 11]
result_df_dicts = {"patch_size": [],
                   "data_type" : [],
                   "df" : []}

num_epochs = 50
os.makedirs("./checkpoints/patch_size_test", exist_ok=True) #checkpoints 저장할 dir

for patch_size in patch_sizes:
    print(patch_size)
    train_dataset, val_dataset = create_datasets(patch_size)
    train_loader, val_loader = create_dataloaders(train_dataset, val_dataset)
    
    print(f"Training with patch size: {patch_size}")

    model = ResNetLike_48(num_classes=6).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_model_state, train_losses, val_losses = train_model(
        model, train_loader, val_loader, criterion, optimizer, num_epochs=num_epochs, patience=100
    )
    
    torch.save(best_model_state, f"./checkpoints/patch_size_test/ResNetLike_48_{patch_size}_{num_epochs}.pth")
    model.load_state_dict(best_model_state)
    
    print("train data")
    train_df = evaluate_model_with_cm(model, train_loader, num_classes=6)
    result_df_dicts["patch_size"].append(patch_size)
    result_df_dicts["data_type"].append("train_data")
    result_df_dicts["df"].append(train_df)
    
    print("validation data")
    validation_df = evaluate_model_with_cm(model, val_loader, num_classes=6)
    result_df_dicts["patch_size"].append(patch_size)
    result_df_dicts["data_type"].append("validation")
    result_df_dicts["df"].append(validation_df)

In [None]:
# 모든 데이터를 하나의 데이터프레임으로 합치기
final_df = pd.concat([
    df.assign(patch_size=patch, data_type=dt)
    for patch, dt, df in zip(result_df_dicts["patch_size"], result_df_dicts["data_type"], result_df_dicts["df"])
], ignore_index=False)

# 인덱스를 리셋하고 정리
final_df.reset_index(inplace=True)
final_df.rename(columns={"index": "class"}, inplace=True)

# 저장
os.makedirs("./results", exist_ok=True) 
final_df.to_csv("./results/patch_size_results.csv", index=False, encoding="utf-8-sig")