In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import shutil
source_path = '/content/drive/MyDrive/open (1).zip'
destination_path = '/content/open (1).zip'
shutil.copyfile(source_path, destination_path)

'/content/open (1).zip'

In [14]:
import zipfile
import os

# 압축 파일 경로
zip_file_path = '/content/open (1).zip'

# 압축을 해제할 폴더 경로 (현재 디렉토리에 'extracted'라는 폴더를 만듭니다)
extract_path = '/content/extracted'

# 압축 해제할 폴더가 없다면 생성합니다.
os.makedirs(extract_path, exist_ok=True)

try:
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"'{zip_file_path}' 파일의 압축이 '{extract_path}' 폴더에 성공적으로 해제되었습니다.")
except FileNotFoundError:
    print(f"오류: '{zip_file_path}' 파일을 찾을 수 없습니다. 파일 경로를 확인해주세요.")
except zipfile.BadZipFile:
    print(f"오류: '{zip_file_path}' 파일이 올바른 ZIP 파일 형식이 아닙니다.")
except Exception as e:
    print(f"압축 해제 중 오류가 발생했습니다: {e}")

'/content/open (1).zip' 파일의 압축이 '/content/extracted' 폴더에 성공적으로 해제되었습니다.


In [5]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
# 1. 데이터 로딩
train_df = pd.read_csv('extracted/train.csv')
test_df = pd.read_csv('extracted/test.csv')
artists_info = pd.read_csv('extracted/artists_info.csv')  # 포함


In [7]:
# 2. 화가 이름 → 숫자 라벨
unique_artists = artists_info['name'].tolist()  # artists_info 기준
label_mapping = {name: i for i, name in enumerate(unique_artists)}

train_df['label'] = train_df['artist'].map(label_mapping)
test_df['label'] = -1  # 테스트 데이터는 예측 대상


In [8]:
# 3. 이미지 경로 설정
train_df['img_path'] = train_df['img_path']
test_df['img_path'] = test_df['img_path']


In [9]:
# 4. 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [10]:
# 5. Dataset 클래스 정의
class ArtDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None, is_test=False):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx]['label']
            return image, label

In [11]:
# 6. 데이터로더
train_dataset = ArtDataset(train_df, 'extracted', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ArtDataset(test_df, 'extracted', transform=transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [12]:
# 7. 모델 정의 (ResNet18)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 192MB/s]


In [13]:
# 8. 모델 학습
for epoch in range(3):  # 3 에폭 예시
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # 라벨 값이 올바른 범위 내에 있는지 확인
        print(labels.min(), labels.max())

        labels = labels.long()  # 라벨 타입 변경

        # 라벨 값이 잘못된 경우 처리
        labels = torch.clamp(labels, min=0)  # 0보다 작은 값을 0으로 처리

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/3], Loss: {total_loss:.4f}")



tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(4., device='cuda:0', dtype=torch.float64) tensor(47., device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(5., device='cuda:0', dtype=torch.float64) tensor(47., device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
tensor(3., device='cuda:0', dtype=torch.float64) tensor(48., device='cuda:0', dtype=torch.float64)
tensor(nan, device='cuda:0', dtype=torch.float64) tensor(nan, device='cuda:0', dtype=torch.float64)
ten

In [14]:
# 9. 테스트 예측
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())


In [16]:
# 10. 예측 결과 변환 및 제출 파일 생성
reverse_mapping = {v: k for k, v in label_mapping.items()}
predicted_artists = [reverse_mapping[i] for i in predictions]

submission = pd.read_csv('extracted/sample_submission.csv')
submission['artist'] = predicted_artists
submission.to_csv('submission.csv', index=False)


In [26]:
# 1. 데이터 로딩
train_df = pd.read_csv('extracted/train.csv')
test_df = pd.read_csv('extracted/test.csv')
artists_info = pd.read_csv('extracted/artists_info.csv')  # 포함

In [27]:
# 2. 화가 이름 → 숫자 라벨
unique_artists = artists_info['name'].tolist()  # artists_info 기준
label_mapping = {name: i for i, name in enumerate(unique_artists)}

train_df['label'] = train_df['artist'].map(label_mapping)
test_df['label'] = -1  # 테스트 데이터는 예측 대상

In [28]:
# 3. 이미지 경로 설정
train_df['img_path'] = train_df['img_path']
test_df['img_path'] = test_df['img_path']

In [30]:
# 4. 이미지 전처리 및 증강 (업그레이드)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [31]:
# 5. Dataset 클래스 정의
class ArtDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None, is_test=False):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx]['label']
            return image, label

In [32]:
# 6. 데이터로더
train_dataset = ArtDataset(train_df, 'extracted', transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ArtDataset(test_df, 'extracted', transform=test_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [35]:
# 7. 모델 정의 (ResNet18 그대로 사용)
from torchvision.models import resnet18, ResNet18_Weights

# 모델 정의 (ResNet18 사용)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# weights를 ResNet18_Weights.DEFAULT로 변경
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [37]:
# 모델 정의 (이미 이전에 정의된 코드)
from torchvision.models import resnet18, ResNet18_Weights

weights = ResNet18_Weights.DEFAULT  # 최신 가중치
model = resnet18(weights=weights)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)  # 학습률 ↓

# 학습 코드
from sklearn.metrics import accuracy_score

num_epochs = 10  # 에폭 증가

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).long()
        labels = torch.clamp(labels, min=0)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # 정확도 측정을 위한 예측 수집
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Acc: {acc:.4f}")


Epoch [1/10], Loss: 463.9901, Acc: 0.3739
Epoch [2/10], Loss: 299.9259, Acc: 0.5718
Epoch [3/10], Loss: 238.3486, Acc: 0.6515
Epoch [4/10], Loss: 201.9562, Acc: 0.7024
Epoch [5/10], Loss: 177.9903, Acc: 0.7313
Epoch [6/10], Loss: 158.5414, Acc: 0.7645
Epoch [7/10], Loss: 148.3062, Acc: 0.7731
Epoch [8/10], Loss: 131.2661, Acc: 0.8016
Epoch [9/10], Loss: 118.0734, Acc: 0.8208
Epoch [10/10], Loss: 108.1817, Acc: 0.8337


In [38]:
# 9. 테스트 예측
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

In [40]:
# 10. 예측 결과 변환 및 제출 파일 생성
reverse_mapping = {v: k for k, v in label_mapping.items()}
predicted_artists = [reverse_mapping[i] for i in predictions]

submission = pd.read_csv('extracted/sample_submission.csv')
submission['artist'] = predicted_artists
submission.to_csv('submission2.csv', index=False)

In [5]:
import pandas as pd
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.metrics import accuracy_score
from torch.optim.lr_scheduler import CosineAnnealingLR
import numpy as np

In [6]:
# 1. 데이터 로딩
try:
    train_df = pd.read_csv('extracted/train.csv')
    test_df = pd.read_csv('extracted/test.csv')
    artists_info = pd.read_csv('extracted/artists_info.csv')  # 포함
except FileNotFoundError as e:
    print(f"Error loading file: {e}. Please ensure the files are in the 'extracted' directory.")
    exit()

In [7]:
# 2. 화가 이름 → 숫자 라벨
unique_artists = artists_info['name'].tolist()  # artists_info 기준
label_mapping = {name: i for i, name in enumerate(unique_artists)}

train_df['label'] = train_df['artist'].map(label_mapping)
test_df['label'] = -1  # 테스트 데이터는 예측 대상

In [8]:
# 3. 이미지 경로 설정
train_df['img_path'] = train_df['img_path']
test_df['img_path'] = test_df['img_path']

In [9]:
# 4. 이미지 전처리 및 증강 (업그레이드)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),  # 각도 증가
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),  # 범위 증가
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    # Affine 변환 추가
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # 정규화 추가
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # 정규화 추가
])

In [10]:
class ArtDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None, is_test=False):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.valid_indices = self._get_valid_indices()

    def _get_valid_indices(self):
        valid_indices = []
        for idx in range(len(self.dataframe)):
            label = self.dataframe.iloc[idx]['label']
            if not pd.isna(label):
                try:
                    int(label)
                    valid_indices.append(idx)
                except ValueError:
                    print(f"Warning: Invalid label '{label}' at index {idx}")
            else:
                print(f"Warning: NaN value found for label at index {idx}")
        return valid_indices

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
        original_index = self.valid_indices[idx]
        img_name = self.dataframe.iloc[original_index]['img_path']
        img_path = os.path.join(self.root_dir, img_name)
        try:
            image = Image.open(img_path).convert("RGB")
        except FileNotFoundError:
            print(f"Error: Image not found at {img_path}")
            return None

        if self.transform:
            image = self.transform(image)

        if self.is_test:
            return image
        else:
            label = int(self.dataframe.iloc[original_index]['label'])
            return image, torch.tensor(label, dtype=torch.long)

In [11]:
# 6. 데이터로더
train_dataset = ArtDataset(train_df, 'extracted', transform=train_transform)
train_loader = DataLoader(
    train_dataset, batch_size=32, shuffle=True, num_workers=2, drop_last=True
)  # num_workers 추가 and drop_last

test_dataset = ArtDataset(
    test_df, 'extracted', transform=test_transform, is_test=True
)
test_loader = DataLoader(
    test_dataset, batch_size=32, shuffle=False, num_workers=2
)  # num_workers 추가



In [12]:
# 7. 모델 정의 (ResNet18 그대로 사용)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

weights = ResNet18_Weights.DEFAULT  # 최신 가중치
model = resnet18(weights=weights)
model.fc = nn.Linear(model.fc.in_features, len(label_mapping))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(
    model.parameters(), lr=1e-4, weight_decay=0.01
)  # AdamW 사용 및 weight_decay 추가
scheduler = CosineAnnealingLR(optimizer, T_max=50)  # CosineAnnealingLR 스케줄러 추가

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s]


In [13]:
# 학습 코드
num_epochs = 50  # 에폭 증가

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for i, batch in enumerate(train_loader):
        if batch is None:
            print(f"Warning: Skipping batch {i} due to None values.")
            continue  # Skip this batch

        images, labels = batch
        if images is None or labels is None:
            print(f"Warning: Skipping batch {i} due to None images or labels.")
            continue

        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        if i % 10 == 0:  # Logging frequency
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss:"
                f" {loss.item():.4f}"
            )

    scheduler.step()  # 에폭마다 스케줄러 업데이트
    acc = accuracy_score(all_labels, all_preds)
    print(
        f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f},"
        f" Acc: {acc:.4f}, LR: {optimizer.param_groups[0]['lr']:.6f}"
    )

Epoch [1/50], Step [1/177], Loss: 4.2157
Epoch [1/50], Step [11/177], Loss: 3.5160
Epoch [1/50], Step [21/177], Loss: 3.4993
Epoch [1/50], Step [31/177], Loss: 2.7081
Epoch [1/50], Step [41/177], Loss: 2.9371


KeyboardInterrupt: 

In [None]:
# 9. 테스트 예측
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

print("Test predictions:", predictions)

In [None]:
submission_df = pd.DataFrame({'id': test_df['id'], 'artist': [unique_artists[p] for p in predictions]})
submission_df.to_csv('submission.csv5', index=False)