In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score
import numpy as np
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

In [31]:
# 데이터 변환 설정
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# CSV 파일 로드
train_labels = pd.read_csv('C:/Users/jymin/OneDrive - UNIST (1)/AI-Factory 경진대회/task1_data/train_labels.csv')

# 커스텀 데이터셋 클래스 정의
class CustomImageDataset(Dataset):
    def __init__(self, labels_df, img_dir, transform=None):
        self.labels_df = labels_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.labels_df.iloc[idx, 0])  # 이미지 이름 가져오기
        image = Image.open(img_name).convert('RGB')
        label = self.labels_df.iloc[idx, 1]  # 라벨 가져오기

        if self.transform:
            image = self.transform(image)

        return image, label

# 테스트 데이터셋 정의
class CustomImageTestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.img_names = os.listdir(img_dir)  # 폴더 내 모든 이미지 파일 이름 목록

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.img_names[idx])  # 이미지 이름 가져오기
        image = Image.open(img_name).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image


# 데이터셋 및 데이터로더 생성
train_data, val_data = train_test_split(train_labels, test_size=0.2, stratify=train_labels['bad'], random_state=42)

train_dataset = CustomImageDataset(labels_df=train_data, img_dir='C:/Users/jymin/OneDrive - UNIST (1)/AI-Factory 경진대회/task1_data/train', transform=data_transforms)
val_dataset = CustomImageDataset(labels_df=val_data, img_dir='C:/Users/jymin/OneDrive - UNIST (1)/AI-Factory 경진대회/task1_data/train', transform=data_transforms)
test_dataset = CustomImageTestDataset(img_dir='C:/Users/jymin/OneDrive - UNIST (1)/AI-Factory 경진대회/task1_data/test', transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [32]:
# 사전 훈련된 ResNet50 모델 불러오기
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # Output layer for binary classification ("Good" or "Bad")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [33]:
# 손실 함수 및 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
import pandas as pd

# 모델 학습 함수 (검증 포함)
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20):
    best_f1 = 0.0
    best_model_wts = model.state_dict()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # 검증 단계에서 F1-Score 계산
        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())

        # F1-Score 계산 (weighted average)
        epoch_f1 = f1_score(y_true, y_pred, average='weighted')
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader.dataset):.4f}, Validation F1-Score: {epoch_f1:.4f}')

        # Best 모델 저장
        if epoch_f1 > best_f1:
            best_f1 = epoch_f1
            best_model_wts = model.state_dict()

    # 최적 모델 반환
    model.load_state_dict(best_model_wts)
    print(f'Best Validation F1-Score: {best_f1:.4f}')
    return model

In [35]:
# 모델 학습 실행
model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20)

Epoch 1/20, Loss: 0.0994, Validation F1-Score: 0.9957
Epoch 2/20, Loss: 0.0164, Validation F1-Score: 0.9957
Epoch 3/20, Loss: 0.0109, Validation F1-Score: 0.9986
Epoch 4/20, Loss: 0.0042, Validation F1-Score: 0.9986
Epoch 5/20, Loss: 0.0088, Validation F1-Score: 1.0000
Epoch 6/20, Loss: 0.0028, Validation F1-Score: 0.9986
Epoch 7/20, Loss: 0.0024, Validation F1-Score: 0.9986
Epoch 8/20, Loss: 0.0030, Validation F1-Score: 1.0000
Epoch 9/20, Loss: 0.0052, Validation F1-Score: 0.9957
Epoch 10/20, Loss: 0.0075, Validation F1-Score: 0.9971
Epoch 11/20, Loss: 0.0007, Validation F1-Score: 1.0000
Epoch 12/20, Loss: 0.0002, Validation F1-Score: 1.0000
Epoch 13/20, Loss: 0.0002, Validation F1-Score: 1.0000
Epoch 14/20, Loss: 0.0002, Validation F1-Score: 1.0000
Epoch 15/20, Loss: 0.0001, Validation F1-Score: 0.9986
Epoch 16/20, Loss: 0.0001, Validation F1-Score: 1.0000
Epoch 17/20, Loss: 0.0001, Validation F1-Score: 1.0000
Epoch 18/20, Loss: 0.0000, Validation F1-Score: 1.0000
Epoch 19/20, Loss: 

In [37]:
# 모델을 평가 모드로 설정
model.eval()
all_preds = []
test_filenames = test_loader.dataset.img_names

with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())

# 테스트 예측 결과를 DataFrame으로 저장
preds_df = pd.DataFrame({'file_name': test_filenames, 'bad': all_preds})
output_path = 'C:/Users/jymin/OneDrive - UNIST (1)/AI-Factory 경진대회/task1_data/test_predictions.csv'
preds_df.to_csv(output_path, index=False)

print("Test predictions saved to 'test_predictions.csv'")

Test predictions saved to 'test_predictions.csv'
