# Sources
- https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

Imports

In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from tqdm import tqdm
import seaborn as sns
from pathlib import Path
import glob
import mimetypes
import itertools
from typing import Iterable,Generator,Sequence,Iterator,List,Set,Dict,Union,Optional,Tuple
import meta_utils
import pandas as pd
from sklearn import metrics, model_selection, preprocessing
from sklearn.model_selection import StratifiedKFold

from torch import nn
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader
import albumentations as A
from albumentations.core.composition import Compose
from albumentations.pytorch import ToTensorV2
import cv2

sns.set()

os.environ["TORCH_HOME"] = "/media/hdd/Datasets/"
cudnn.benchmark = True

# Config

In [2]:

def fish_name_fn(x): return str(x).split("/")[-2]
def asl_name_fn(x): return str(x).split("/")[-2]

ds_path = Path("/media/hdd/Datasets/asl/")
ds_name = "asl"
name_fn = asl_name_fn
image_size = 224
batch_size = 128
pretrained = True
epoch_steps = [1,2]
enable_proxy_attention = True
change_subset_attention = 0.01
validation_split = 0.3
shuffle_dataset = True


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
data_transforms = {
    'train':  A.Compose(
        [
            A.RandomResizedCrop(image_size, image_size, p=1.0),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0,
                p=1.0,
            ),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
    ),
    'val': A.Compose([
        A.CenterCrop(image_size, image_size, p=1.0),
        A.Resize(image_size, image_size),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
        ToTensorV2(p=1.0),
    ],
        p=1.0,
    )

}


# Data part

In [4]:
# Get all image files
all_files = meta_utils.get_files(ds_path/"train")

# Put them in a data frame for encoding
df = pd.DataFrame.from_dict(
    {x: name_fn(x) for x in all_files}, orient="index"
).reset_index()
df.columns = ["image_id", "label"]

In [None]:
# df["label"].value_counts()

In [5]:
# Convert labels to integers
temp = preprocessing.LabelEncoder()
df["label"] = temp.fit_transform(df.label.values)

# Save label map
label_map = {i: l for i, l in enumerate(temp.classes_)}

# Kfold splits
df["kfold"] = -1
df = df.sample(frac=1).reset_index(drop=True)
stratify = StratifiedKFold(n_splits=2)
for i, (t_idx, v_idx) in enumerate(
    stratify.split(X=df.image_id.values, y=df.label.values)
):
    df.loc[v_idx, "kfold"] = i
df.to_csv("train_folds.csv", index=False)

In [6]:
class ImageClassDs(Dataset):
    def __init__(
        self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms=None
    ):
        self.df = df
        self.imfolder = imfolder
        self.train = train
        self.transforms = transforms
        self.classes = self.df["label"]

    def __getitem__(self, index):
        im_path = self.df.iloc[index]["image_id"]
        x = cv2.imread(str(im_path), cv2.IMREAD_COLOR)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)

        if self.transforms:
            x = self.transforms(image=x)["image"]

        y = self.df.iloc[index]["label"]
        return {
            "x": x,
            "y": y,
        }

    def __len__(self):
        return len(self.df)

In [17]:
train = df.loc[df["kfold"] != 1]
val = df.loc[df["kfold"] == 1]
image_datasets = {
    "train": ImageClassDs(train, ds_path, train=True, transforms=data_transforms["train"]),

    "val": ImageClassDs(val, ds_path, train=False, transforms=data_transforms["val"]),
}

dataloaders = {
    "train": torch.utils.data.DataLoader(
        image_datasets["train"],
        batch_size=batch_size,
        shuffle=True, num_workers=12),
        
    "val": torch.utils.data.DataLoader(
        image_datasets["val"],
        batch_size=batch_size,
        shuffle=False, num_workers=12),
    }


In [18]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

In [42]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    pbar = tqdm(range(num_epochs), total=num_epochs)
    for epoch in pbar:
        # print(f'Epoch {epoch}/{num_epochs - 1}')
        # print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inps in dataloaders[phase]:
                inputs = inps['x'].to(device)
                labels = inps['y'].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            # print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            pbar.set_postfix({
                'Phase' : phase,
                'Loss' : epoch_loss,
                'Acc' : epoch_acc
            })

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [43]:
num_classes = len(label_map.keys())

In [44]:
model_ft = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, num_classes)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [45]:
model_ft = train_model(model_ft, criterion, optimizer_ft,
                       exp_lr_scheduler, num_epochs=25)


  0%|          | 0/25 [00:00<?, ?it/s]