In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import os
from PIL import Image
from tqdm import tqdm
from google.colab import drive
import numpy as np
from sklearn.metrics import f1_score

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_csv_path = "/content/drive/MyDrive/train.csv"
test_csv_path = "/content/drive/MyDrive/test.csv"
train_img_folder = "/content/drive/MyDrive/Dataset/train"
upscale_train_folder = "/content/drive/MyDrive/upscale_train"
test_img_folder = "/content/drive/MyDrive/test"

In [None]:
class BirdDataset(Dataset):
    def __init__(self, csv_file, img_dir, upscale_dir=None, transform=None, train=True):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.upscale_dir = upscale_dir
        self.transforms = transform
        self.train = train

        if train:
            self.label_mapping = {v: k for k, v in dict(enumerate(self.data['label'].unique())).items()}
            self.data['label'] = pd.Categorical(self.data['label']).codes
            self.labels = self.data['label'].values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]


        if self.train:
            img_name = os.path.basename(row['upscale_img_path'])
            img_path = os.path.join(self.upscale_dir, img_name)
        else:
            img_name = os.path.basename(row['img_path'])
            img_path = os.path.join(self.img_dir, img_name)

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"파일 없음: {img_path}")

        image = Image.open(img_path).convert('RGB')

        if self.transforms:
            image = self.transforms(image)

        if self.train:
            label = int(self.labels[index])
            return image, torch.tensor(label, dtype=torch.long)

        return image

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
train_dataset = BirdDataset(train_csv_path, train_img_folder, upscale_train_folder, transform, train=True)
test_dataset = BirdDataset(test_csv_path, test_img_folder, transform=transform, train=False)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4)

In [None]:
model = models.efficientnet_b0(pretrained=True)
num_ftrs = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 50)
)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 257MB/s]


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)

    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

Epoch 1/10: 100%|██████████| 1980/1980 [32:52<00:00,  1.00it/s, loss=2.66]


Epoch 1, Loss: 0.9750


Epoch 2/10: 100%|██████████| 1980/1980 [16:00<00:00,  2.06it/s, loss=1.09]


Epoch 2, Loss: 0.5148


Epoch 3/10: 100%|██████████| 1980/1980 [15:38<00:00,  2.11it/s, loss=0.181]


Epoch 3, Loss: 0.4216


Epoch 4/10: 100%|██████████| 1980/1980 [15:40<00:00,  2.10it/s, loss=0.431]


Epoch 4, Loss: 0.3613


Epoch 5/10: 100%|██████████| 1980/1980 [15:36<00:00,  2.11it/s, loss=0.00431]


Epoch 5, Loss: 0.3204


Epoch 6/10: 100%|██████████| 1980/1980 [15:49<00:00,  2.09it/s, loss=0.514]


Epoch 6, Loss: 0.2965


Epoch 7/10: 100%|██████████| 1980/1980 [15:47<00:00,  2.09it/s, loss=0.411]


Epoch 7, Loss: 0.2754


Epoch 8/10: 100%|██████████| 1980/1980 [15:39<00:00,  2.11it/s, loss=0.205]


Epoch 8, Loss: 0.2493


Epoch 9/10: 100%|██████████| 1980/1980 [15:46<00:00,  2.09it/s, loss=0.00185]


Epoch 9, Loss: 0.2384


Epoch 10/10: 100%|██████████| 1980/1980 [15:54<00:00,  2.07it/s, loss=0.39]

Epoch 10, Loss: 0.2153





In [None]:
def evaluate(model, test_loader, device):
    model.eval()
    predicted_classes = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating", leave=True):
            if isinstance(batch, (list, tuple)):
                images, labels = batch[0], batch[1]
            else:
                images = batch
                labels = None

            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predicted_classes.extend(predicted.cpu().numpy())

            if labels is not None:
                true_labels.extend(labels.cpu().numpy())

    if true_labels:
        f1 = f1_score(true_labels, predicted_classes, average='macro')
        print(f"F1 Score: {f1:.4f}")

    return np.array(predicted_classes)


eval_dataset = BirdDataset(train_csv_path, train_img_folder, upscale_train_folder, transform, train=True)
eval_loader = DataLoader(eval_dataset, batch_size=8, shuffle=False, num_workers=4)

evaluate(model, eval_loader, device)

Evaluating: 100%|██████████| 1980/1980 [03:47<00:00,  8.72it/s]

F1 Score: 0.9731





array([19,  9, 13, ...,  2, 19, 22])

In [None]:
def test(model, test_loader, device):
    model.eval()
    predicted_classes = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing", leave=True):
            if isinstance(batch, (list, tuple)):
                images = batch[0]
            else:
                images = batch

            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predicted_classes.extend(predicted.cpu().numpy())

    return np.array(predicted_classes)


predicted_classes = test(model, test_loader, device)
print("예측값 샘플:", predicted_classes[:10])

Testing: 100%|██████████| 849/849 [15:07<00:00,  1.07s/it]

예측값 샘플: [15 12 16 19 21 11 17 20  7  2]





In [None]:
train_csv_path = "/content/drive/MyDrive/train.csv"
train_df = pd.read_csv(train_csv_path)


class_mapping = {i: label for i, label in enumerate(train_df["label"].unique())}


def save_predictions(predictions, test_csv_path, output_csv_path):
    test_df = pd.read_csv(test_csv_path)


    predicted_labels = [class_mapping[pred] for pred in predictions]


    submission_df = pd.DataFrame({
        "id": test_df["id"],
        "label": predicted_labels
    })
    submission_df.to_csv(output_csv_path, index=False)
    print(f"예측 결과가 {output_csv_path} 파일로 저장되었습니다.")


predicted_classes = test(model, test_loader, device)


output_csv_path = "/content/drive/MyDrive/sample_submission.csv"
save_predictions(predicted_classes, test_csv_path, output_csv_path)

Testing: 100%|██████████| 849/849 [01:26<00:00,  9.78it/s]


예측 결과가 /content/drive/MyDrive/sample_submission.csv 파일로 저장되었습니다.


In [None]:
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat