# Imports

In [25]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from collections import Counter
from matplotlib import pyplot as plt
from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
from torchvision import datasets, transforms, models
from tqdm.notebook import tqdm

%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [1]:
# Different modes of dataset
DATA_MODES = ['train', 'test']
# All images are scaled to size 224x224 px
RESCALE_SIZE = 224
# Use cuda
DEVICE = torch.device("cuda")
# Data loaders
BATCH_SIZE= 2000
# Parallel computing
N_CORES = 12

"use_cuda", torch.cuda.is_available()

NameError: name 'torch' is not defined

In [27]:
!nvidia-smi

Mon Nov 28 02:00:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 522.06       Driver Version: 522.06       CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   49C    P8    10W /  N/A |    601MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [28]:
from torch.backends import cudnn

# Make computations reproducible
torch.manual_seed(7)
np.random.seed(7)
cudnn.benchmark = True
torch.use_deterministic_algorithms(False)

from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Downloading dataset

In [29]:
# Download dataset from kaggle
!pip install -q kaggle
!kaggle competitions download -c journey-springfield

journey-springfield.zip: Skipping, found more recently modified local copy (use --force to force download)


In [30]:
# Unzip dataset
!unzip -q ./journey-springfield.zip -d /data

'unzip' is not recognized as an internal or external command,
operable program or batch file.


In [31]:
!dir "./data/train/simpsons_dataset"

 Volume in drive C has no label.
 Volume Serial Number is 8ADD-B58A

 Directory of C:\Users\Alexey\PycharmProjects\deep-learning-mptu\notebooks\image_classification\data\train\simpsons_dataset

28.11.2022  00:21    <DIR>          .
28.11.2022  00:17    <DIR>          ..
28.11.2022  00:17    <DIR>          abraham_grampa_simpson
28.11.2022  00:17    <DIR>          agnes_skinner
28.11.2022  00:17    <DIR>          apu_nahasapeemapetilon
28.11.2022  00:17    <DIR>          barney_gumble
28.11.2022  00:18    <DIR>          bart_simpson
28.11.2022  00:18    <DIR>          carl_carlson
28.11.2022  00:18    <DIR>          charles_montgomery_burns
28.11.2022  00:18    <DIR>          chief_wiggum
28.11.2022  00:18    <DIR>          cletus_spuckler
28.11.2022  00:18    <DIR>          comic_book_guy
28.11.2022  00:18    <DIR>          disco_stu
28.11.2022  00:19    <DIR>          edna_krabappel
28.11.2022  00:19    <DIR>          fat_tony
28.11.2022  00:19    <DIR>          gil
28.11.2022  00:19 

In [32]:
# Saving paths to train/test datasets
DATA_DIR = "./data/"
TRAIN_DIR = Path(DATA_DIR + 'train/simpsons_dataset')
TEST_DIR = Path(DATA_DIR + 'testset/testset')

# Analyzing dataset

Let's look if classes are balanced in dataset and make plan before starting train model.

In [33]:
train_files = [path.parent.name for path in TRAIN_DIR.rglob('*.jpg')]
train_labels = pd.Series(train_files).value_counts().sort_values().to_frame("count")
test_files = [path for path in TEST_DIR.rglob('*.jpg')]
train_labels

Unnamed: 0,count
lionel_hutz,3
troy_mcclure,8
disco_stu,8
miss_hoover,17
gil,27
fat_tony,27
otto_mann,32
sideshow_mel,40
agnes_skinner,42
rainier_wolfcastle,45


Dataset is very unbalanced, and model eventually will show us bad scores for classes with low number of images.
So we need to consider data augmentation to balance dataset.

# Data augmentation

Let's use random order of transforms to apply augmentation to our dataset. We're not gonna divide dataset on train and valid.

In [34]:
augmentation = transforms.RandomOrder([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=(-30, 30)),
    transforms.Compose([
        transforms.Resize(size=300, max_size=301),
        transforms.CenterCrop(size=300),
        transforms.RandomCrop(250)
    ]),
])
def _my_normalization(x):
    return np.array(x, dtype="float32") / 255.0

train_transforms = transforms.Compose([
    augmentation,
    transforms.Resize(size=(RESCALE_SIZE, RESCALE_SIZE)),
    # Converting PIL image to Tensor and normalazing it
    transforms.Lambda(_my_normalization),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ,
])

In [35]:
# Create dataset obj
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=train_transforms)

In [36]:
class_count = Counter(train_dataset.targets)
class_weights = {i: 1/c for i, c in class_count.items()}
sample_weights = [0] * len(train_dataset)
for i, (data, label) in enumerate(tqdm(train_dataset)):
    class_weight = class_weights[label]
    sample_weights[i] = class_weight

  0%|          | 0/20933 [00:00<?, ?it/s]

In [50]:
N = max(class_count.values()) * len(class_count)  # fit to max
train_sampler = WeightedRandomSampler(sample_weights, num_samples=N, replacement=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler,
                          num_workers=8, pin_memory=True, pin_memory_device="cuda")

Let's give a look at some images in dataset after augmentation.

In [51]:
def imshow(inp, title=None, plt_ax=plt, default=False):
    """Imshow for tensors"""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt_ax.imshow(inp)
    if title is not None:
        plt_ax.set_title(title)
    plt_ax.grid(False)

In [52]:
# axes = plt.subplots(nrows=3, ncols=4, figsize=(12, 9))[1]
# data, labels = next(iter(train_loader))
# for ax, d, l in zip(axes.flatten(), data, labels):
#     imshow(d.data, title=train_dataset.classes[l], plt_ax=ax)

In [53]:
def fit_epoch(model, train_loader, criterion, optimizer, scheduler):
    running_loss = 0.0
    running_corrects = 0
    processed_data = 0
    with tqdm(train_loader, unit="batch") as tepoch:
        for inputs, labels in tepoch:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            preds = torch.argmax(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            processed_data += inputs.size(0)
    scheduler.step()
    train_loss = running_loss / processed_data
    train_acc = running_corrects.cpu().numpy() / processed_data
    return train_loss, train_acc

In [54]:
def train(model, train_loader, epochs, batch_size):

    history = []
    log_template = "\nEpoch {ep:03d} train_loss: {t_loss:0.4f} \
    train_acc {t_acc:0.4f}"

    with tqdm(desc="epoch", total=epochs) as pbar_outer:
        opt = torch.optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss()
        scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=7, gamma=0.1)

        for epoch in range(epochs):
            train_loss, train_acc = fit_epoch(model, train_loader,
                                              criterion, opt, scheduler)
            print("loss", train_loss)
            
            history.append((train_loss, train_acc))
            
            pbar_outer.update(1)
            tqdm.write(log_template.format(ep=epoch+1, t_loss=train_loss,
                                           t_acc=train_acc))
             
    return history

We're gonna use ResNet and apply fine tuning, try

In [55]:
def set_parameter_requires_grad(model, feature_extracting):
    for param in model.parameters():
            param.requires_grad = feature_extracting

In [56]:
def initialize_model(num_classes, feature_extract=False, use_pretrained=True):
    if use_pretrained:
        print('[INFO]: Loading pre-trained weights')
    else:
        print('[INFO]: Not loading pre-trained weights')
    model = models.shufflenet_v2_x0_5(weights=models.ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1)
    if feature_extract:
        print('[INFO]: Fine-tuning all layers...')
    elif not feature_extract:
        print('[INFO]: Freezing hidden layers...')
    model.fc = nn.Sequential(
        nn.Linear(in_features=1024, out_features=1024, bias=True),
        nn.Hardswish(),
        nn.Dropout(p=0.15, inplace=True),
        nn.Linear(in_features=1024, out_features=1024, bias=True),
        nn.Hardswish(),
        nn.Dropout(p=0.15, inplace=True),
        nn.Linear(in_features=1024, out_features=num_classes, bias=True)
    )
    return model

In [None]:
num_classes = len(train_labels.index)
model = initialize_model(num_classes, feature_extract=False).to(DEVICE)
model = model.cuda()
model

In [None]:
history = train(model, train_loader, epochs=20, batch_size=BATCH_SIZE)
history

epoch:   0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?batch/s]

## Testing our model

Let's test our model on test dataset. Before that we need to customize dataset.

In [None]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []

        for inputs in test_loader:
            inputs = inputs.to(DEVICE)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)

    probs = nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    return probs

In [None]:
class TestDataset(Dataset):
    def __init__(self, files):
        self.files = files

    def __getitem__(self, index):
        test_transforms = transforms.Compose([
            transforms.Resize(size=(RESCALE_SIZE, RESCALE_SIZE)),
            transforms.Lambda(lambda x: np.array(x, dtype="float32") / 255),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ,
        ])
        img = Image.open(self.files[index]).convert('RGB')
        return test_transforms(img)

    def __len__(self):
        return len(self.files)

In [None]:
test_dataset = TestDataset(test_files)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE)
probs = predict(model, test_loader)
preds = [train_dataset.classes[i] for i in probs.argmax(axis=-1)]

In [None]:
submission = pd.DataFrame({
    "Id": [path.name for path in test_dataset.files],
    "Expected": preds
})
submission.to_csv("/content/baseline_aug.csv", index=False)
submission