In [1]:
import os

In [1]:
# !nvidia-smi

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"]="1,4"

In [3]:
from pathlib import Path
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms.v2 as T
from torch import nn
import cv2
from pylab import imshow
import torch.nn.functional as F
from numpy import array
import random

import lightning as L
import torchmetrics

from torch.utils import data
import PIL.Image
import albumentations as A
import albumentations.pytorch.transforms
import glob
from sklearn.model_selection import train_test_split

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [5]:
train_images_path = Path().absolute() / 'public_tests' / '00_test_img_input' / 'train' / 'images'
train_gt_path = Path().absolute() / 'public_tests' / '00_test_img_input' / 'train' / 'gt.csv'
assert train_images_path.exists(), train_images_path.absolute()
assert train_gt_path.exists(), train_gt_path.absolute()

In [7]:
def read_csv(filename):
    res = {}
    with open(filename) as fhandle:
        next(fhandle)
        for line in fhandle:
            filename, class_id = line.rstrip('\n').split(',')
            res[filename] = int(class_id)
    return res

In [8]:
train_gt = read_csv(train_gt_path)

In [6]:
class MyCustomDataset(data.Dataset):
    def __init__(self, mode, root_dir=train_images_path,
                 train_fraction=0.9, split_seed=42, transform=None,):

        paths = []
        labels = []

        tr, val = train_test_split(list(train_gt.items()),
                                   train_size=train_fraction,
                                   random_state=split_seed,
                                   stratify=list(train_gt.values()))
    
        if mode == "train":
            cls = tr
        elif mode == "valid":
            cls = val
        else:
            raise RuntimeError(f"Invalid mode: {mode!r}")
            
        for obj, label in cls:
            paths.append(os.path.join(root_dir, obj))
            labels.append(label)

        self._paths = np.array(paths)
        self._labels = np.array(labels)

        self._transform = transform

    def __len__(self):
        return len(self._paths)

    def __getitem__(self, index):
        img_path = self._paths[index]
        label = self._labels[index]

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        tr = self._transform(image=image)
        image = tr["image"]

        return image, label

In [56]:
NETWORK_SIZE = (224, 224)
BATCH_SIZE = 64

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

common_transforms = [
    A.Resize(*NETWORK_SIZE),
    A.ToFloat(max_value=255),
    A.Normalize(max_pixel_value=1.0, mean=IMAGENET_MEAN, std=IMAGENET_STD),
    A.pytorch.transforms.ToTensorV2(),
]

aug_transforms = [
    A.OneOf([
        A.HorizontalFlip(),
        A.Rotate(border_mode=cv2.BORDER_CONSTANT),
    ], p=0.5),
    A.OneOf([
        A.RGBShift(),
        A.ToGray(),
        A.RandomBrightnessContrast(),
        A.Blur(blur_limit=3),
        A.CLAHE(),
        A.HueSaturationValue(),
    ], p=0.5)
]

MyTransformTrain = A.Compose(aug_transforms + common_transforms)
MyTransformVal = A.Compose(common_transforms)

In [57]:
ds_train = MyCustomDataset(mode="train", transform=MyTransformTrain)
ds_valid = MyCustomDataset(mode="valid", transform=MyTransformVal)

dl_train = data.DataLoader(
    ds_train,
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True,
    num_workers=os.cpu_count(),
)
dl_valid = data.DataLoader(
    ds_valid,
    batch_size=BATCH_SIZE,
    shuffle=False,
    drop_last=False,
    num_workers=os.cpu_count(),
)

In [58]:
def get_frozen_model(num_classes, unfreeze_layers, transfer=True):
    weights = torchvision.models.MobileNet_V2_Weights.DEFAULT if transfer else None
    model = torchvision.models.mobilenet_v2(weights=weights)

    model.classifier = nn.Sequential(
        # nn.AdaptiveAvgPool2d(1280),
        nn.Linear(model.last_channel, 512),
        nn.BatchNorm1d(512),
        nn.ReLU6(inplace=True),
        nn.Dropout(p=0.2),
        nn.Linear(512, num_classes)
        # nn.Softmax(dim=1),
    )

    for child in list(model.features.children())[:-unfreeze_layers]:
        for param in child.parameters():
            param.requires_grad = False

    return model

In [68]:
# get_frozen_model(50, 3)

In [69]:
# from torchsummary import summary
# summary(get_frozen_model(50, 3).to(DEVICE), (3, 224, 224))

In [61]:
class MyTrainingModuleFrozen(L.LightningModule):
    def __init__(self, num_classes, unfrozen_layers):
        super().__init__()
        self.model = get_frozen_model(num_classes, unfrozen_layers)
        self.train_loss = []
        self.valid_accs = []
        self.accuracy = torchmetrics.classification.Accuracy(
            task="multiclass",
            num_classes=num_classes,
        )

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
        lr_scheduler_config = {
            "scheduler": lr_scheduler,
            "interval": "epoch",
            "monitor": "valid_accs",
        }

        return [optimizer], [lr_scheduler_config]
  
    def training_step(self, batch):
        return self._step(batch, "train")

    def validation_step(self, batch):
        return self._step(batch, "valid")

    def _step(self, batch, kind):
        x, y = batch
        p = self.model(x)
        loss = F.cross_entropy(p, y)
        accs = self.accuracy(p.argmax(axis=-1), y)

        metrics = {
            f"{kind}_accs": accs,
            f"{kind}_loss": loss,
        }
        self.log_dict(
            metrics,
            prog_bar=True,
            logger=True,
            on_step=kind == "train",
            on_epoch=True,
        )

        return loss


def my_early_stopping():
    # Monitor a metric and stop training when it stops improving.
    return L.pytorch.callbacks.EarlyStopping(
        monitor="valid_accs",
        mode="max",
        patience=5,
        verbose=True,
    )


def my_training_checkpoint():
    # Save the training module periodically by monitoring a quantity.
    return L.pytorch.callbacks.ModelCheckpoint(
        filename="{epoch}-{valid_accs:.3f}",
        monitor="valid_accs",
        mode="max",
        save_top_k=1,
        save_last=True,
    )


def my_lr_monitor():
    return L.pytorch.callbacks.LearningRateMonitor()


def my_progress_bar():
    return L.pytorch.callbacks.TQDMProgressBar()

In [62]:
training_module = MyTrainingModuleFrozen(50, 3)
trainer = L.Trainer(accelerator="auto",
                    max_epochs=30,
                    callbacks=[
                        my_progress_bar(),
                        my_lr_monitor(),
                        my_training_checkpoint(),
                        my_early_stopping(),
                    ],)
trainer.fit(training_module, dl_train, dl_valid)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1,4]

  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | model    | MobileNetV2        | 2.9 M  | train
1 | accuracy | MulticlassAccuracy | 0      | train
--------------------------------------------------------
1.9 M     Trainable params
1.0 M     Non-trainable params
2.9 M     Total params
11.626    Total estimated model params size (MB)
217       Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …

/home/krotovan/.local/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (17) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

Metric valid_accs improved. New best score: 0.636


Validation: |                                                                                                 …

Metric valid_accs improved by 0.100 >= min_delta = 0.0. New best score: 0.736


Validation: |                                                                                                 …

Metric valid_accs improved by 0.044 >= min_delta = 0.0. New best score: 0.780


Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Monitored metric valid_accs did not improve in the last 5 records. Best score: 0.780. Signaling Trainer to stop.


In [70]:
# model = MyTrainingModuleFrozen.load_from_checkpoint("lightning_logs/version_45/checkpoints/epoch=23-valid_accs=0.800.ckpt", num_classes=50, unfrozen_layers=3).model

In [65]:
# model = training_module.model

In [71]:
torch.save(model.state_dict(), "birds_model.pt")

In [32]:
test_images_path = Path().absolute() / 'public_tests' / '00_test_img_input' / 'test' / 'images'
test_gt_path = Path().absolute() / 'public_tests' / '00_test_img_gt' / 'gt.csv'
assert test_images_path.exists(), test_images_path.absolute()
assert test_gt_path.exists(), test_gt_path.absolute()

In [33]:
class MyCustomTestDataset(data.Dataset):
    def __init__(self, root_dir=test_images_path,
                 transform=None):

        paths = []
        cls_paths = sorted(glob.glob(f"{root_dir}/*"))
        paths.extend(cls_paths)

        self._len = len(paths)
        self._paths = paths
        if transform is None:
            transform = DEFAULT_TRANSFORM
        self._transform = transform

    def __len__(self):
        return len(self._paths)

    def __getitem__(self, index):
        img_path = self._paths[index]

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        image = self._transform(image=image)["image"]

        return image, img_path.split('/')[-1]

In [35]:
ds_test = MyCustomTestDataset(transform=MyTransformVal)

dl_test = data.DataLoader(
    ds_test,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=os.cpu_count(),
)

In [36]:
def test(fitted_model):
    preds = {}
    model = fitted_model
    model.eval()
    model.to(DEVICE)

    progress_test = tqdm(
        total=len(dl_test),
        leave=True,
    )
    with torch.no_grad():
        for x_batch, x_path in dl_test:
            x_batch = x_batch.to(DEVICE)
            
            prediction = model(x_batch)
            for i in range(len(x_batch)):
                prediction_val = prediction[i].cpu().detach().numpy()
                preds[x_path[i]] = prediction_val.argmax(axis=-1)

            progress_test.update()
        progress_test.close()
    return preds


In [37]:
def save_csv(img_classes, filename):
    with open(filename, 'w') as fhandle:
        print('filename,class_id', file=fhandle)
        for filename in sorted(img_classes.keys()):
            print('%s,%d' % (filename, img_classes[filename]), file=fhandle)


def check_test():
    output = read_csv('output.csv')
    gt = read_csv(test_gt_path)

    correct = 0
    total = len(gt)
    for k, v in gt.items():
        if output[k] == v:
            correct += 1

    accuracy = correct / total

    res = 'Ok, accuracy %.4f' % accuracy
    print(res)

In [None]:
img_classes = test(model)
save_csv(img_classes, Path().absolute() / 'output.csv')

In [67]:
check_test()

Ok, accuracy 0.9724
