In [1]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim


In [2]:
class ArtDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)

        label = self.dataframe.iloc[idx]['label']
        return image, label


In [4]:
# train.csv 파일
train_df = pd.read_csv('extracted_open_1/train.csv')

# 화가 이름 → 숫자 라벨
label_mapping = {name: i for i, name in enumerate(train_df['artist'].unique())}
train_df['label'] = train_df['artist'].map(label_mapping)

# 이미지 경로
train_df['img_path'] = train_df['img_path']  # 경로가 정리되어 있다고 가정

# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset = ArtDataset(train_df, root_dir='extracted_open_1', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 간단한 학습 루프
for epoch in range(3):  # 예시로 3에폭만
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}], Loss: {loss.item():.4f}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\302-15/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 55.6MB/s]


Epoch [1], Loss: 2.4479
Epoch [2], Loss: 1.9949
Epoch [3], Loss: 1.5325


In [6]:
test_df = pd.read_csv('extracted_open_1/test.csv')
test_df['img_path'] = test_df['img_path']

class TestDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

test_dataset = TestDataset(test_df, root_dir='extracted_open_1', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predictions = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())


In [8]:
# 숫자 → 작가 이름 매핑
reverse_mapping = {v: k for k, v in label_mapping.items()}

# 예측 결과를 작가 이름으로 변환
predicted_artists = [reverse_mapping[label] for label in predictions]

# sample_submission.csv에 맞춰 저장
submission = pd.read_csv('extracted_open_1/sample_submission.csv')
submission['artist'] = predicted_artists
submission.to_csv('submission.csv', index=False)


In [9]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.optim import lr_scheduler

class ArtDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)

        label = self.dataframe.iloc[idx]['label']
        return image, label

# 데이터 로드
train_df = pd.read_csv('extracted_open_1/train.csv')

# 화가 이름 → 숫자 라벨
label_mapping = {name: i for i, name in enumerate(train_df['artist'].unique())}
train_df['label'] = train_df['artist'].map(label_mapping)

# 이미지 경로
train_df['img_path'] = train_df['img_path']

# 이미지 전처리 및 데이터 증강
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Pretrained weights에 맞게 정규화
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Pretrained weights에 맞게 정규화
])

train_dataset = ArtDataset(train_df, root_dir='extracted_open_1', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 모델 정의 (ResNet50 사용)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))  # 마지막 레이어 수정
model = model.to(device)

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 학습률 스케줄러 설정 (StepLR 예시)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # 정확도 측정을 위한 예측 수집
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    scheduler.step()  # 학습률 스케줄러 적용

    acc = accuracy_score(all_labels, all_preds)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Acc: {acc:.4f}")

# 테스트 데이터 예측
test_df = pd.read_csv('extracted_open_1/test.csv')
test_df['img_path'] = test_df['img_path']

test_dataset = ArtDataset(test_df, root_dir='extracted_open_1', transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predictions = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# 숫자 → 작가 이름 매핑
reverse_mapping = {v: k for k, v in label_mapping.items()}

# 예측 결과를 작가 이름으로 변환
predicted_artists = [reverse_mapping[label] for label in predictions]

# sample_submission.csv에 맞춰 저장
submission = pd.read_csv('extracted_open_1/sample_submission.csv')
submission['artist'] = predicted_artists
submission.to_csv('submission.csv4', index=False)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\302-15/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 60.9MB/s]


Epoch [1/10], Loss: 422.6267, Acc: 0.4168
Epoch [2/10], Loss: 250.3515, Acc: 0.6292
Epoch [3/10], Loss: 192.0350, Acc: 0.7122
Epoch [4/10], Loss: 164.2338, Acc: 0.7501
Epoch [5/10], Loss: 142.9303, Acc: 0.7780
Epoch [6/10], Loss: 106.0062, Acc: 0.8428
Epoch [7/10], Loss: 86.4140, Acc: 0.8750
Epoch [8/10], Loss: 77.5506, Acc: 0.8877
Epoch [9/10], Loss: 73.4289, Acc: 0.8993
Epoch [10/10], Loss: 73.2055, Acc: 0.8922


KeyError: 'label'