In [1]:
import torch
import numpy as np
import PIL

print(torch.cuda.is_available())

True


In [2]:
!nvidia-smi

Sun Aug 18 15:38:47 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060        Off | 00000000:03:00.0 Off |                  N/A |
|  0%   55C    P8               9W / 170W |     11MiB / 12288MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 3060        Off | 00000000:04:00.0  On |  

In [3]:
import pickle
import numpy as np
from skimage import io

from tqdm import tqdm, tqdm_notebook
from PIL import Image
from pathlib import Path

from torchvision import transforms, models, datasets
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

from matplotlib import colors, pyplot as plt
%matplotlib inline

In [4]:
RESCALE_SIZE = 224

In [5]:
train_transforms = transforms.Compose([
    transforms.Resize((RESCALE_SIZE, RESCALE_SIZE)),
    #transforms.AutoAugment(),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x / 255),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [6]:
train_dir = "./journey-springfield/train/simpsons_dataset"

In [7]:
import math

image_datasets = datasets.ImageFolder(train_dir, train_transforms)

In [8]:
dataset_len = len(image_datasets)
p1 = (dataset_len / 100)
train_size = math.floor(p1 * 70)
test_size = dataset_len - train_size

In [9]:
train_dataset, val_dataset = torch.utils.data.random_split(image_datasets, [train_size, test_size])

In [10]:
len(train_dataset), len(val_dataset)

(14653, 6280)

In [11]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=128, num_workers=torch.cpu.device_count(),
    shuffle=True
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=128, num_workers=torch.cpu.device_count(),
)

loaders = {
    "train": train_dataloader,
    "valid": val_dataloader
}

In [12]:
model_resnet18 = models.resnet18(pretrained=True)
model_resnet18



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
for param in model_resnet18.parameters():
    param.requires_grad = False

In [14]:
for param in model_resnet18.avgpool.parameters():
    param.requires_grad = True
    
for param in model_resnet18.layer4.parameters():
    param.requires_grad = True

for param in model_resnet18.layer3.parameters():
    param.requires_grad = True

for param in model_resnet18.layer2.parameters():
    param.requires_grad = True

In [15]:
num_classes = 42
model_resnet18.fc = nn.Sequential(
    nn.Sequential(
        nn.Linear(512, 512),
        nn.BatchNorm1d(512),
        nn.Dropout(0.5),
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(512, 512),
        nn.Linear(512, num_classes)
    )
) 

In [16]:
import torch.optim as optim

optimizer = optim.Adam(
    (
        {
            "params": model_resnet18.layer2.parameters(),
            "lr": 1e-5
        },
        {
            "params": model_resnet18.layer3.parameters(),
            "lr": 1e-4
        },
        {
            "params": model_resnet18.layer4.parameters(),
            "lr": 1e-3
        },
        {
            "params": model_resnet18.avgpool.parameters()
        },
        {
            "params": model_resnet18.fc.parameters()
        }
    ), lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer, 5, gamma=0.5)

In [17]:
!rm -rf logs

In [18]:
from catalyst import dl
        
runner = dl.SupervisedRunner()

In [27]:
%%time

runner.train(
    model=model_resnet18,
    engine=dl.GPUEngine(),
    optimizer=optimizer,
    criterion=nn.CrossEntropyLoss(),
    scheduler=scheduler,
    callbacks=[
        dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"),
        dl.BackwardCallback(metric_key="loss"),
        dl.OptimizerCallback(metric_key="loss"), 
        dl.AccuracyCallback(input_key="logits", target_key="targets"),
        dl.SchedulerCallback(),
        dl.PrecisionRecallF1SupportCallback(
            input_key="logits", target_key="targets", num_classes=num_classes, log_on_batch=False
        )
    ],
    loaders=loaders,
    num_epochs=35,
    verbose=True,
    logdir="logs/resnet18",
    load_best_on_end=True
)

1/1 * Epoch (train): 100%|█| 115/115 [00:38<00:00,  2.97it/s, accuracy01=0.689, 


train (1/1) accuracy01: 0.6130485222956786 | accuracy01/std: 0.1744946522347089 | f1/_macro: 0.29911824800408965 | f1/_micro: 0.613043522527642 | f1/_weighted: 0.5981767682085427 | loss: 1.8176660946365224 | loss/mean: 1.8176660946365224 | loss/std: 1.3987019730974475 | lr: 1e-05 | momentum: 0.9 | precision/_macro: 0.32270164806819585 | precision/_micro: 0.6130485224868627 | precision/_weighted: 0.5907551653414108 | recall/_macro: 0.2963525272342956 | recall/_micro: 0.6130485224868627 | recall/_weighted: 0.6130485224868626


1/1 * Epoch (valid): 100%|█| 50/50 [00:12<00:00,  4.01it/s, accuracy01=0.875, lo


valid (1/1) accuracy01: 0.7780254777070064 | accuracy01/std: 0.03613852157255981 | f1/_macro: 0.39458729293905226 | f1/_micro: 0.7780204777391387 | f1/_weighted: 0.7518811656080608 | loss: 0.9281402362380056 | loss/mean: 0.9281402362380056 | loss/std: 0.22820435465271505 | lr: 1e-05 | momentum: 0.9 | precision/_macro: 0.4914469224682573 | precision/_micro: 0.7780254777070064 | precision/_weighted: 0.7653520602391035 | recall/_macro: 0.38802097413766923 | recall/_micro: 0.7780254777070064 | recall/_weighted: 0.7780254777070064
* Epoch (1/1) lr: 1e-05 | momentum: 0.9
Top models:
logs/resnet18/checkpoints/model.0001.pth	1.0000
CPU times: user 25.8 s, sys: 4.81 s, total: 30.6 s
Wall time: 52 s


  checkpoint = torch.load(path, map_location=lambda storage, loc: storage)


In [28]:
%load_ext tensorboard
%tensorboard --logdir logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 6511), started 0:02:14 ago. (Use '!kill 6511' to kill it.)

In [21]:
import os

class TestDataset(Dataset):
    def __init__(self, root, transforms):
        super().__init__()
        self.files = sorted(list(Path(root).rglob('*.jpg')), key=self.extract_image_number)
        self.transforms = transforms

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        x = self.load_sample(self.files[index])
        return self.transforms(x), 0

    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image

    def extract_image_number(self, path):
        filename = os.path.basename(path)
        return int(filename[3:-4])

In [22]:
test_transforms = transforms.Compose([
    transforms.Resize((RESCALE_SIZE, RESCALE_SIZE)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x / 255),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [23]:
test_dir = "./testset/testset"
test_dataset = TestDataset(test_dir, test_transforms)

In [24]:
test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=128
)

In [25]:
result = []

for prediction in runner.predict_loader(engine=dl.GPUEngine("cuda"), loader=test_dataloader):
    result += prediction['logits'].detach().cpu().numpy().argmax(1).tolist()

AssertionError: 

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(image_datasets.classes)

In [None]:
import pandas as pd

sample_submission_path = 'sample_submission.csv'
submission = pd.read_csv(sample_submission_path)
submission.head()

In [None]:
submission['Expected'] = label_encoder.inverse_transform(result)
submission

In [None]:
submission_path = 'submission.csv'
submission.to_csv(submission_path, index=None)