In [1]:
import torch
import numpy as np
import PIL

print(torch.cuda.is_available())

True


In [2]:
!nvidia-smi

Fri Jul 26 20:49:08 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.29.06              Driver Version: 545.29.06    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3080 ...    Off | 00000000:01:00.0  On |                  N/A |
| N/A   49C    P8              20W / 125W |    174MiB / 16384MiB |     15%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
import pickle
import numpy as np
from skimage import io

from tqdm import tqdm, tqdm_notebook
from PIL import Image
from pathlib import Path

from torchvision import transforms, models, datasets
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

from matplotlib import colors, pyplot as plt
%matplotlib inline

In [4]:
DATA_MODES = ['train', 'val', 'test']
RESCALE_SIZE = 224
DEVICE = torch.device("cuda")

In [5]:
data_transforms = transforms.Compose([
    transforms.Resize((RESCALE_SIZE, RESCALE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [6]:
train_dir = "./train/simpsons_dataset"

In [7]:
import math

image_datasets = datasets.ImageFolder(train_dir, data_transforms)

In [8]:
dataset_len = len(image_datasets)
p1 = (dataset_len / 100)
train_size = math.floor(p1 * 70)
test_size = dataset_len - train_size

In [9]:
train_dataset, val_dataset = torch.utils.data.random_split(image_datasets, [train_size, test_size])

In [10]:
len(train_dataset), len(val_dataset)

(14653, 6280)

In [11]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=32,
    shuffle=True
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=128
)

loaders = {
    "train": train_dataloader,
    "valid": val_dataloader
}

In [12]:
model_resnet18 = models.resnet18(pretrained=True)
model_resnet18



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
for param in model_resnet18.parameters():
    param.requires_grad = False

In [14]:
num_classes = 42
model_resnet18.fc = nn.Linear(512, num_classes)

In [15]:
import torch.optim as optim

optimizer = optim.Adam(model_resnet18.fc.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.5)

In [16]:
!rm -rf logs

In [17]:
from catalyst import dl

runner = dl.SupervisedRunner()

runner.train(
    model=model_resnet18,
    engine=dl.GPUEngine("cuda"),
    optimizer=optimizer,
    criterion=nn.CrossEntropyLoss(),
    scheduler=scheduler,
    callbacks=[
        dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"),
        dl.BackwardCallback(metric_key="loss"),
        dl.OptimizerCallback(metric_key="loss"), 
        dl.AccuracyCallback(input_key="logits", target_key="targets"),
        dl.SchedulerCallback(),
        dl.PrecisionRecallF1SupportCallback(
            input_key="logits", target_key="targets", num_classes=num_classes, log_on_batch=False
        ),
        dl.MultilabelAccuracyCallback(
            input_key="logits", target_key="targets", threshold=0.5
        )
    ],
    loaders=loaders,
    num_epochs=2,
    verbose=True,
    logdir="logs/resnet18",
    load_best_on_end=True,
)

1/2 * Epoch (train):   0%|          | 0/458 [00:00<?, ?it/s]

train (1/2) accuracy: 0.7757049729000266 | accuracy/std: 0.018888789793209838 | accuracy01: 0.16153688662810212 | accuracy01/std: 0.09196300792572057 | f1/_macro: 0.05253553738118266 | f1/_micro: 0.16153188679913177 | f1/_weighted: 0.1360469254037464 | loss: 2.9885655937118036 | loss/mean: 2.9885655937118036 | loss/std: 0.2670616760514393 | lr: 0.0001 | momentum: 0.9 | precision/_macro: 0.09622150038414544 | precision/_micro: 0.16153688664437316 | precision/_weighted: 0.1872967246514975 | recall/_macro: 0.055255643433054585 | recall/_micro: 0.16153688664437316 | recall/_weighted: 0.16153688664437316




1/2 * Epoch (valid):   0%|          | 0/50 [00:00<?, ?it/s]

valid (1/2) accuracy: 0.8023127219479557 | accuracy/std: 0.003921774910885513 | accuracy01: 0.2933121019108281 | accuracy01/std: 0.045131544528801026 | f1/_macro: 0.10835410442547812 | f1/_micro: 0.29330710199606 | f1/_weighted: 0.2634342828790352 | loss: 2.705651424796718 | loss/mean: 2.705651424796718 | loss/std: 0.10071353925032626 | lr: 0.0001 | momentum: 0.9 | precision/_macro: 0.19537095403177082 | precision/_micro: 0.293312101910828 | precision/_weighted: 0.37478612516436277 | recall/_macro: 0.10754583868022854 | recall/_micro: 0.293312101910828 | recall/_weighted: 0.293312101910828
* Epoch (1/2) lr: 0.0001 | momentum: 0.9


2/2 * Epoch (train):   0%|          | 0/458 [00:00<?, ?it/s]

train (2/2) accuracy: 0.856709028212909 | accuracy/std: 0.027268273194124195 | accuracy01: 0.4010782774899065 | accuracy01/std: 0.09829475271008242 | f1/_macro: 0.164047345279494 | f1/_micro: 0.4010732775481703 | f1/_weighted: 0.3773019098353382 | loss: 2.5024270151106176 | loss/mean: 2.5024270151106176 | loss/std: 0.2090335133332254 | lr: 0.0001 | momentum: 0.9 | precision/_macro: 0.22537197448399973 | precision/_micro: 0.40107827748583913 | precision/_weighted: 0.43565942158957593 | recall/_macro: 0.1594255629590236 | recall/_micro: 0.40107827748583913 | recall/_weighted: 0.40107827748583913


2/2 * Epoch (valid):   0%|          | 0/50 [00:00<?, ?it/s]

valid (2/2) accuracy: 0.8915946476778408 | accuracy/std: 0.0033170093203202535 | accuracy01: 0.49554140127388535 | accuracy01/std: 0.05164927485334196 | f1/_macro: 0.20209256468927211 | f1/_micro: 0.49553640132433474 | f1/_weighted: 0.46048043906953406 | loss: 2.3145194940506277 | loss/mean: 2.3145194940506277 | loss/std: 0.11254629553532026 | lr: 0.0001 | momentum: 0.9 | precision/_macro: 0.23211043795162994 | precision/_micro: 0.49554140127388535 | precision/_weighted: 0.48020903831289835 | recall/_macro: 0.20507323616266518 | recall/_micro: 0.49554140127388535 | recall/_weighted: 0.49554140127388535
* Epoch (2/2) lr: 0.0001 | momentum: 0.9
Top models:
logs/resnet18/checkpoints/model.0002.pth	2.0000


In [18]:
%load_ext tensorboard
%tensorboard --logdir logs

In [19]:
class TestDataset(Dataset):
    def __init__(self, files, transforms):
        super().__init__()
        self.files = files
        self.transforms = transforms

    def __len__(self):
        return len(self.files)

    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image

    def __getitem__(self, index):
        x = self.load_sample(self.files[index])
        return self.transforms(x)

In [20]:
test_dir = "./testset/testset"
TEST_DIR = Path(test_dir)
test_files = list(TEST_DIR.rglob('*.jpg'))

In [21]:
test_dataset = TestDataset(test_files, data_transforms)

In [22]:
test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=128
)

In [53]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(image_datasets.classes)

In [None]:
result = []

for prediction in runner.predict_loader(engine=dl.GPUEngine("cuda"), loader=val_dataloader):
    result += prediction['logits'].detach().cpu().numpy().argmax(1).tolist()

label_encoder.inverse_transform(result)