In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torchvision.transforms import RandAugment
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import CyclicLR
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision.models import resnet50
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms import v2
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split

In [2]:
extract_dir = '/home/jupyter/mnt/datasets/'
train_labels_csv = os.path.join(extract_dir, 'bhw1/labels.csv')
train_image_dir = os.path.join(extract_dir, 'bhw1/trainval')
test_image_dir = os.path.join(extract_dir, 'bhw1/test')
sample_submission_csv = os.path.join(extract_dir, 'bhw1/sample_submission.csv')

In [3]:
class ImageDataset(Dataset):
    def __init__(self, labels_csv, image_dir, transform=None):
        self.data = pd.read_csv(labels_csv)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.data.iloc[idx, 0])
        image = Image.open(img_name).convert("RGB")
        label = self.data.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
mean = [0.5691, 0.5447, 0.4933]
std = [0.1876, 0.1863, 0.1906]

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

transform_test = transforms.Compose([
    transforms.Resize((40, 40)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

full_train_dataset = ImageDataset(labels_csv=train_labels_csv, image_dir=train_image_dir, transform=transform_train)

labels_df = pd.read_csv(train_labels_csv)
labels = labels_df['Category'].values

train_idx, validation_idx = train_test_split(np.arange(len(labels)),
                                             test_size=0.05,
                                             random_state=42,
                                             shuffle=True,
                                             stratify=labels)

train_dataset = Subset(full_train_dataset, train_idx)
val_dataset = Subset(full_train_dataset, validation_idx)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=2, prefetch_factor=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=2, prefetch_factor=2, pin_memory=True)

test_dataset = ImageDataset(labels_csv=sample_submission_csv, image_dir=test_image_dir, transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=2, prefetch_factor=2, pin_memory=True)

In [5]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return torch.relu(out)

class EnhancedResNet(nn.Module):
    def __init__(self, num_classes):
        super(EnhancedResNet, self).__init__()
        self.initial_layer = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.layer1 = self._make_layer(64, 128, num_blocks=3, stride=1)
        self.layer2 = self._make_layer(128, 256, num_blocks=4, stride=2)
        self.layer3 = self._make_layer(256, 512, num_blocks=6, stride=2)

        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            layers.append(ResidualBlock(
                in_channels if i == 0 else out_channels, 
                out_channels, 
                stride if i == 0 else 1
            ))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.initial_layer(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [6]:
def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0, 0.01)
            nn.init.constant_(m.bias, 0)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(pd.read_csv(train_labels_csv)['Category'].unique())
model = EnhancedResNet(num_classes=num_classes).to(device)
initialize_weights(model)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
# optimizer1 = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# scheduler1 = CyclicLR(optimizer1, base_lr=1e-4, max_lr=1e-2, step_size_up=10, mode='triangular2', cycle_momentum=False)
# optimizer2 = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
# scheduler2 = ReduceLROnPlateau(optimizer2, patience=7)

optimizer1 = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler1 = CyclicLR(optimizer1, base_lr=1e-4, max_lr=1e-2, step_size_up=10, mode='triangular2', cycle_momentum=False)
optimizer2 = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
scheduler2 = ReduceLROnPlateau(optimizer2, patience=7)

num_epochs_stage1 = 22
num_epochs_stage2 = 13
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs_stage1):
    model.train()
    correct_train, total_train = 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_stage1}"):
        images, labels = images.to(device), labels.to(device)

        optimizer1.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer1.step()

        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = correct_train / total_train
    train_accuracies.append(train_accuracy)
    scheduler1.step()

    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = correct_val / total_val
    val_accuracies.append(val_accuracy)
    print(f"Epoch {epoch+1}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch 1/22: 100%|██████████| 372/372 [08:02<00:00,  1.30s/it]


Epoch 1: Train Accuracy: 0.0309, Validation Accuracy: 0.0600


Epoch 2/22: 100%|██████████| 372/372 [08:07<00:00,  1.31s/it]


Epoch 2: Train Accuracy: 0.0418, Validation Accuracy: 0.0624


Epoch 3/22: 100%|██████████| 372/372 [08:08<00:00,  1.31s/it]


Epoch 3: Train Accuracy: 0.0750, Validation Accuracy: 0.1060


Epoch 4/22: 100%|██████████| 372/372 [08:08<00:00,  1.31s/it]


Epoch 4: Train Accuracy: 0.1127, Validation Accuracy: 0.0920


Epoch 5/22: 100%|██████████| 372/372 [08:07<00:00,  1.31s/it]


Epoch 5: Train Accuracy: 0.1477, Validation Accuracy: 0.1648


Epoch 6/22: 100%|██████████| 372/372 [08:05<00:00,  1.30s/it]


Epoch 6: Train Accuracy: 0.1863, Validation Accuracy: 0.2056


Epoch 7/22: 100%|██████████| 372/372 [08:04<00:00,  1.30s/it]


Epoch 7: Train Accuracy: 0.2184, Validation Accuracy: 0.2382


Epoch 8/22: 100%|██████████| 372/372 [08:03<00:00,  1.30s/it]


Epoch 8: Train Accuracy: 0.2426, Validation Accuracy: 0.2672


Epoch 9/22: 100%|██████████| 372/372 [08:03<00:00,  1.30s/it]


Epoch 9: Train Accuracy: 0.2714, Validation Accuracy: 0.2726


Epoch 10/22: 100%|██████████| 372/372 [08:02<00:00,  1.30s/it]


Epoch 10: Train Accuracy: 0.2980, Validation Accuracy: 0.2916


Epoch 11/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 11: Train Accuracy: 0.3169, Validation Accuracy: 0.3238


Epoch 12/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 12: Train Accuracy: 0.3485, Validation Accuracy: 0.3462


Epoch 13/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 13: Train Accuracy: 0.3819, Validation Accuracy: 0.3548


Epoch 14/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 14: Train Accuracy: 0.4096, Validation Accuracy: 0.3832


Epoch 15/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 15: Train Accuracy: 0.4428, Validation Accuracy: 0.3954


Epoch 16/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 16: Train Accuracy: 0.4749, Validation Accuracy: 0.4264


Epoch 17/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 17: Train Accuracy: 0.5117, Validation Accuracy: 0.4422


Epoch 18/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 18: Train Accuracy: 0.5484, Validation Accuracy: 0.4570


Epoch 19/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 19: Train Accuracy: 0.5890, Validation Accuracy: 0.4672


Epoch 20/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 20: Train Accuracy: 0.6298, Validation Accuracy: 0.4740


Epoch 21/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 21: Train Accuracy: 0.6607, Validation Accuracy: 0.4754


Epoch 22/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 22: Train Accuracy: 0.6614, Validation Accuracy: 0.4782


In [10]:
for epoch in range(7):
    model.train()
    correct_train, total_train = 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_stage1 + num_epochs_stage2}"):
        images, labels = images.to(device), labels.to(device)

        optimizer2.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer2.step()

        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = correct_train / total_train
    train_accuracies.append(train_accuracy)

    model.eval()
    correct_val, total_val, val_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)
            v_loss = criterion(outputs, labels)

        val_loss += v_loss.item() * images.shape[0]

    val_loss /= len(val_loader.dataset)
    scheduler2.step(val_loss)

    val_accuracy = correct_val / total_val
    val_accuracies.append(val_accuracy)
    print(f"Epoch {epoch+1}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch 23/35: 100%|██████████| 372/372 [07:58<00:00,  1.29s/it]


Epoch 23: Train Accuracy: 0.6818, Validation Accuracy: 0.4822


Epoch 24/35: 100%|██████████| 372/372 [08:00<00:00,  1.29s/it]


Epoch 24: Train Accuracy: 0.6814, Validation Accuracy: 0.4828


Epoch 25/35: 100%|██████████| 372/372 [08:00<00:00,  1.29s/it]


Epoch 25: Train Accuracy: 0.6798, Validation Accuracy: 0.4816


Epoch 26/35: 100%|██████████| 372/372 [08:00<00:00,  1.29s/it]


Epoch 26: Train Accuracy: 0.6813, Validation Accuracy: 0.4826


Epoch 27/35: 100%|██████████| 372/372 [07:59<00:00,  1.29s/it]


Epoch 27: Train Accuracy: 0.6815, Validation Accuracy: 0.4800


Epoch 28/35: 100%|██████████| 372/372 [08:00<00:00,  1.29s/it]


Epoch 28: Train Accuracy: 0.6821, Validation Accuracy: 0.4800


Epoch 29/35: 100%|██████████| 372/372 [07:59<00:00,  1.29s/it]


Epoch 29: Train Accuracy: 0.6833, Validation Accuracy: 0.4862


Epoch 30/35:   0%|          | 1/372 [00:02<17:47,  2.88s/it]


KeyboardInterrupt: 

In [12]:
for epoch in range(5):
    model.train()
    correct_train, total_train = 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_stage1}"):
        images, labels = images.to(device), labels.to(device)

        optimizer1.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer1.step()

        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    train_accuracy = correct_train / total_train
    train_accuracies.append(train_accuracy)
    scheduler1.step()

    model.eval()
    correct_val, total_val = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)

    val_accuracy = correct_val / total_val
    val_accuracies.append(val_accuracy)
    print(f"Epoch {epoch+1}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch 1/22: 100%|██████████| 372/372 [08:00<00:00,  1.29s/it]


Epoch 1: Train Accuracy: 0.6659, Validation Accuracy: 0.4744


Epoch 2/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 2: Train Accuracy: 0.6680, Validation Accuracy: 0.4710


Epoch 3/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 3: Train Accuracy: 0.6687, Validation Accuracy: 0.4734


Epoch 4/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 4: Train Accuracy: 0.6717, Validation Accuracy: 0.4642


Epoch 5/22: 100%|██████████| 372/372 [08:01<00:00,  1.29s/it]


Epoch 5: Train Accuracy: 0.6732, Validation Accuracy: 0.4464


In [11]:
model.eval()
predictions = []
with torch.no_grad():
    for images, _ in tqdm(test_loader):
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())

submission = pd.read_csv(sample_submission_csv)
submission['Category'] = predictions
submission.to_csv('labels_test1.csv', index=False)
print("Predictions saved to labels_test.csv")

100%|██████████| 40/40 [00:16<00:00,  2.45it/s]

Predictions saved to labels_test.csv



