### Competition: https://www.kaggle.com/competitions/journey-springfield
#### Score: 0.98831

In [17]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [18]:
train_data_dir = './train/simpsons_dataset'
test_data_dir = './testset/testset'

In [19]:
import os


# dataset = datasets.ImageFolder(root=train_data_dir, transform=None)

# aug_transform = transforms.Compose([
#     transforms.RandomResizedCrop(size=224, scale=(0.8, 1), interpolation=transforms.InterpolationMode.LANCZOS),
#     transforms.RandomHorizontalFlip(p=0.5),
#     transforms.RandomRotation(degrees=15),
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
#     transforms.ToTensor()
# ])

# for idx, (img, label) in enumerate(dataset):
#     augmented_img = aug_transform(img)
    
#     augmented_img_pil = transforms.ToPILImage()(augmented_img)

#     original_img_path, _ = dataset.samples[idx]
#     original_filename = os.path.basename(original_img_path)
#     original_dir = os.path.dirname(original_img_path)

#     new_filename = original_filename.split('.')[0] + '_aug.jpg'
#     new_img_path = os.path.join(original_dir, new_filename)

#     augmented_img_pil.save(new_img_path)

In [20]:
from torch.utils.data import random_split


transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_val_dataset = datasets.ImageFolder(root=train_data_dir, transform=transform)

train_size = int(0.8 * len(train_val_dataset))
val_size = len(train_val_dataset) - train_size

train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8)

In [21]:
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score


model = models.convnext_base(weights=models.ConvNeXt_Base_Weights.IMAGENET1K_V1).cuda()

for param in model.features[:-2].parameters():
    param.requires_grad = False

num_classes = len(train_val_dataset.classes)
model.classifier[-1] = nn.Sequential(
    nn.Linear(model.classifier[-1].in_features, 1024),
    nn.BatchNorm1d(1024),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1024, num_classes)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-3, weight_decay=1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

In [22]:
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
print(len(train_dataloader))

True
1
NVIDIA GeForce RTX 3050 Laptop GPU
524


In [23]:
from tqdm import tqdm 


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device=device)
torch.backends.cudnn.benchmark = True
scaler = torch.GradScaler("cuda")

num_epochs = 12

for epoch in range(num_epochs):
    
    model.train()
    running_loss = 0.0
    all_labels = []
    all_preds = []
    
    train_dataloader_tqdm = tqdm(train_dataloader, desc="Training", leave=False)
    
    for inputs, labels in train_dataloader_tqdm:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        
        with torch.autocast(device_type="cuda"):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        preds = torch.argmax(outputs, 1)
        running_loss += loss.item()

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        
        train_dataloader_tqdm.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_dataloader)
    train_accuracy = f1_score(all_labels, all_preds, average='weighted')
    
    model.eval()
    running_loss = 0.0
    all_labels = []
    all_preds = []
    
    val_dataloader_tqdm = tqdm(val_dataloader, desc="Validation", leave=False)
    
    with torch.no_grad():
        for inputs, labels in val_dataloader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            with torch.autocast(device_type="cuda"):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, 1)

            running_loss += loss.item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            
            val_dataloader_tqdm.set_postfix(loss=loss.item())

    val_loss = running_loss / len(val_dataloader)
    val_accuracy = f1_score(all_labels, all_preds, average='weighted')
    scheduler.step()
    
    print(f"--- Epoch {epoch + 1}/{num_epochs}: Train accuracy: {train_accuracy:.4f}, Validation accuracy: {val_accuracy:.4f} ---")

Training:   0%|          | 0/524 [00:00<?, ?it/s]

                                                                          

--- Epoch 1/12: Train accuracy: 0.7068, Validation accuracy: 0.8783 ---


                                                                          

--- Epoch 2/12: Train accuracy: 0.8520, Validation accuracy: 0.9287 ---


                                                                          

--- Epoch 3/12: Train accuracy: 0.8908, Validation accuracy: 0.9299 ---


                                                                          

--- Epoch 4/12: Train accuracy: 0.9126, Validation accuracy: 0.9429 ---


                                                                           

--- Epoch 5/12: Train accuracy: 0.9289, Validation accuracy: 0.9589 ---


                                                                          

--- Epoch 6/12: Train accuracy: 0.9423, Validation accuracy: 0.9619 ---


                                                                           

--- Epoch 7/12: Train accuracy: 0.9541, Validation accuracy: 0.9701 ---


                                                                           

--- Epoch 8/12: Train accuracy: 0.9619, Validation accuracy: 0.9737 ---


                                                                           

--- Epoch 9/12: Train accuracy: 0.9676, Validation accuracy: 0.9756 ---


                                                                           

--- Epoch 10/12: Train accuracy: 0.9689, Validation accuracy: 0.9762 ---


                                                                           

--- Epoch 11/12: Train accuracy: 0.9726, Validation accuracy: 0.9759 ---


                                                                           

--- Epoch 12/12: Train accuracy: 0.9710, Validation accuracy: 0.9762 ---




In [None]:
import pandas as pd
import re


def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img)
    img_tensor = img_tensor.unsqueeze(0)
    return img_tensor


test_images = [os.path.join(test_data_dir, f) for f in os.listdir(test_data_dir) if f.endswith(('.jpg'))]
test_images.sort(key= lambda s: int(re.search(r'\d+', s).group()))

In [36]:
res = []
for image in test_images:
    img = preprocess_image(image).to(device)
    pred = torch.argmax(model(img), 1)
    res.append(pred.item())

In [46]:
submit = pd.read_csv('sample_submission.csv')

classes = os.listdir(train_data_dir)

predicts = [classes[i] for i in res]

submit['Expected'] = predicts

submit.to_csv('sample_submission.csv', index=False)
