In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
import numpy as np

In [None]:
from collections import OrderedDict

In [None]:
from PIL.Image import Image

In [None]:
import torch
import torch.nn.functional as F
from torch import nn, Tensor
from torchvision import (transforms, datasets)

In [None]:
from fastai.vision import *

## Train Letters classificartion with convolutional neural networkm

#### Prepare data

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
tfms = transforms.Compose([transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))])

In [None]:
path = Path('data')
path.mkdir(exist_ok=True)

In [None]:
image_path = path / 'geomnist_dataset'

In [None]:
# ! cp /content/drive/My\ Drive/datasets/letters/trained_data/geomnist_dataset.zip {path}

In [None]:
import zipfile
with zipfile.ZipFile(path / 'geomnist_dataset.zip' , 'r') as zip_ref:
    zip_ref.extractall(path)

## Initialize all additional functions at once

In [None]:
class FlattenLayer(nn.Module):
    """Flatten layer"""

    def __init__(self):
        super().__init__()

    def forward(self, x: Tensor) -> Tensor:
        return torch.flatten(x, 1)

In [None]:
def conv2(ni:int, nf:int): 
    return conv_layer(ni,nf,stride=2)

In [None]:
class ResBlock(nn.Module):
    def __init__(self, nf):
        super().__init__()
        self.conv1 = conv_layer(nf, nf)
        self.conv2 = conv_layer(nf, nf)
        
    def forward(self, x): 
        return x + self.conv2(self.conv1(x))

In [None]:
??res_block

In [None]:
def conv_and_res(ni, nf): 
    return nn.Sequential(conv2(ni, nf), res_block(nf))

In [None]:
def img_loader(img_path:Path):
    with open(img_path, mode='rb') as fl:
        with PIL.Image.open(fl) as img:
            return img.convert('L')

## Prepare data loaders

In [None]:
train_dataset = datasets.ImageFolder(image_path / 'train_geo', loader=img_loader, transform=tfms)
valid_dataset = datasets.ImageFolder(image_path / 'val_geo', loader=img_loader, transform=tfms)
test_dataset = datasets.ImageFolder(image_path / 'test_geo', loader=img_loader, transform=tfms)

In [None]:
train_dataset, valid_dataset, train_dataset

In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
data = DataBunch(train_loader, valid_loader, test_dl=test_loader)

In [None]:
loss_func = nn.CrossEntropyLoss()

In [None]:
learn = Learner(data, model, loss_func=loss_func, metrics=accuracy)

#### Model initialization

In [None]:
?? Flatten

In [None]:
input_channels=1

In [None]:
lower_body = nn.Sequential(OrderedDict([('conv1', nn.Conv2d(input_channels, 32, 3)),
                                        ('bn1', nn.BatchNorm2d(32)),
                                        ('relu1', nn.ReLU(inplace=True)),
                                        ('mxpl1', nn.MaxPool2d(2, 2)),
                                        ('conv2', nn.Conv2d(32, 64, kernel_size=3)),
                                        ('bn2', nn.BatchNorm2d(64)),
                                        ('relu2', nn.ReLU(inplace=True)),
                                        ('mxpl2', nn.MaxPool2d(2, 2)),
                                        ('drop1', nn.Dropout2d(p=0.25))]))

In [None]:
conv_body = nn.Sequential(OrderedDict([('conv3', nn.Conv2d(64, 128, kernel_size=3)),
                                       ('bn3', nn.BatchNorm2d(128)),
                                       ('relu3', nn.ReLU(inplace=True)),
                                       ('mxpl3', nn.MaxPool2d(2, 2)),
                                       ('drop2', nn.Dropout2d(p=0.25))]))

In [None]:
linear_body = nn.Sequential(OrderedDict([('flatten', FlattenLayer()),
                                         ('ln1', nn.Linear(2 * 2 * 128, 1024, bias=True)),
                                         ('bn2', nn.BatchNorm1d(1024)),
                                         ('relu3', nn.ReLU(inplace=True)),
                                         ('drop2', nn.Dropout(p=0.25))]))

In [None]:
class LetterNet(nn.Module):
    """Full double letters network implementation"""

    def __init__(self, input_channels=1, num_classes=33):
        super(LetterNet, self).__init__()
        self.conv_part = body
        self.dub_part = conv_body
        self.fc_part = linear_body
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.conv_part(x)
        x = self.dub_part(x)
        x = self.fc_part(x)
        logits = self.fc(x)

        return logits



In [None]:
model = LetterNet()

In [None]:
model

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

## Train model

#### Find learning rate

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(4, 2e-2)

#### Size dependency

In [None]:
sz = 32 # 34
x_test = torch.randn(4 , 1, sz, sz)

In [None]:
model(x_test)

## FastAI for MNIST classifier

In [None]:
?? conv_layer

In [None]:
model = nn.Sequential(
    conv2(1, 8),   # 14
    conv2(8, 16),  # 7
    conv2(16, 32), # 4
    conv2(32, 16), # 2
    conv2(16, 33), # 1
    nn.AdaptiveAvgPool2d((1, 1)),
    FlattenLayer()      # remove (1,1) grid
)

In [None]:
model

#### Adaptive (global) pooling

In [None]:
sz = 32 # 34
x_test = torch.randn(4 , 1, sz, sz)

In [None]:
model(x_test)

In [None]:
learn = Learner(data, model, loss_func = nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.fit_one_cycle(10, max_lr=0.1)

In [None]:
model = nn.Sequential(
    conv_and_res(1, 8),
    conv_and_res(8, 16),
    conv_and_res(16, 32),
    conv_and_res(32, 16),
    conv2(16, 33),
    nn.AdaptiveAvgPool2d((1, 1)),
    FlattenLayer()
)

In [None]:
model

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.lr_find(end_lr=100)
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(12, max_lr=0.05)

## Bigger model with residual connections

In [None]:
model = nn.Sequential(
    conv_and_res(1, 32),
    conv_and_res(32, 64),
    conv_and_res(64, 128),
    conv_and_res(128, 256),
    conv_and_res(256, 512),
    conv2(512, 33),
    nn.AdaptiveAvgPool2d((1, 1)),
    FlattenLayer()
)

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(4, 2e-2)

In [None]:
learn.fit_one_cycle(8, 2e-2)

#### Add regularization

In [None]:
model = nn.Sequential(
    conv_and_res(1, 32),
    conv_and_res(32, 64),
    conv_and_res(64, 128),
    conv_and_res(128, 256),
    conv_and_res(256, 512),
    conv2(512, 1024),
    nn.AdaptiveAvgPool2d((1, 1)),
    FlattenLayer(),
    nn.Linear(1024, 512),
    nn.Dropout(p=0.25),
    nn.Linear(512, 512),
    nn.Dropout(p=0.5),
    nn.Linear(512, 33))

In [None]:
model

In [None]:
from utils.logging.time_logger import start_timer

In [None]:
sz = 32 # 34
x_test = torch.randn(4 , 1, sz, sz)

In [None]:
tm = start_timer(True, model)
y = model(x_test)
tm.timeit()

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(28, 2e-2)

## Serialize model

In [None]:
model_path = path / 'models'
model_path.mkdir(exist_ok=True)

In [None]:
model = learn.model

In [None]:
torch.save(model.state_dict(), str(model_path / 'mnist_resnet.pth'))

In [None]:
state_dict = torch.load(str(path / 'models' / 'mnist_resnet.pth'), map_location='cpu')

In [None]:
state_dict

## Image search

In [None]:
import cv2

In [None]:
from torch import no_grad
from torch.jit import ScriptModule
from torchvision.models import (resnet34, resnet50, wide_resnet50_2)

In [None]:
size = 256
imsz = 224
IMG_SUFF = {'.jpg', '.jpeg', '.png'}

In [None]:
vec_trsfm = transforms.Compose([ToPILImage(mode='RGB'),
                                transforms.Resize(size),
                                transforms.CenterCrop(imsz),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [None]:
class ToPILImage(object):
    """Convert inout image to PIL image"""

    def __init__(self, mode=None):
        super().__init__()
        self.to_pil = transforms.ToPILImage(mode=mode)

    def convert(self, img: Union[np.ndarray, Image]):
        """
        Converts image to the PIL format
        Args:
            img: inout image

        Returns:
            converted image
        """
        return img if isinstance(img, Image) else self.to_pil(img)

    def __call__(self, *args, **kwargs):
        return self.convert(*args, **kwargs)

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        if self.to_pil.mode is not None:
            format_string += f'mode={self.to_pil.mode}'
        format_string += ')'
        return format_string


class Img2Vec(object):
    """Model wrapper for image embedding"""

    def __init__(self, backbone: Union[nn.Module, ScriptModule], trfm: transforms, device: str = 'cpu'):
        super().__init__()
        self.device = torch.device(device)
        self.backbone = (backbone.eval() if hasattr(backbone, 'eval') else backbone).to(device)
        self.trfm = trfm

    def preprocess(self, *xs: np.ndarray) -> Tensor:
        """
        Transform data before model
        Args:
            *xs: input data

        Returns:
            processed data for model
        """
        return torch.stack([self.trfm(x) for x in xs]).to(self.device)

    @no_grad()
    def forward(self, *xs: np.ndarray) -> np.ndarray:
        tns = self.preprocess(*xs)
        rts = self.backbone(tns)
        y = rts.cpu().data.numpy()

        return y

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

#### Prepare data

In [None]:
search_path = path / 'search'

In [None]:
dir_paths = [dp for dp in search_path.iterdir() if dp.is_dir()]

In [None]:
dir_paths

In [None]:
img_pts = [im_pt for dp in dir_paths for im_pt in dp.iterdir() if im_pt.suffix in IMG_SUFF]

In [None]:
def read_img(im_pt):
    img = cv2.imread(str(im_pt), cv2.IMREAD_ANYCOLOR)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    
    return img

In [None]:
imgs = [read_img(ip) for ip in img_pts]

#### Initialize features extractor

In [None]:
cut = 1

In [None]:
body = wide_resnet50_2(pretrained=True)

In [None]:
net = nn.Sequential(nn.Sequential(*list(body.children())[:-cut]), FlattenLayer())

In [None]:
net

In [None]:
img_vec = Img2Vec(net, vec_trsfm, device='cpu')

In [None]:
vecs = [img_vec(im)[0] for im in imgs]

In [None]:
vecs[0].shape, len(vecs)

In [None]:
img_vecs = list(zip(imgs, vecs))

In [None]:
img_vecs

#### Compare vectors

In [None]:
from scipy.spatial.distance import cosine

In [None]:
import matplotlib.pyplot as plt

In [None]:
def top_vecs(qi, top_k=5):
    qv = img_vec(qi)[0]
    resul_pts = [(cosine(qv, vc), pt) for pt, vc in img_vecs]
    resul_pts = sorted(resul_pts, key=lambda x: x[0], reverse=False)
    resul_pts = resul_pts[:top_k]
    
    return resul_pts

#### Query images

In [None]:
query_path = path / 'queries'
query_path.mkdir(exist_ok=True)

In [None]:
qim = read_img(query_path / 'st_1.jpeg')

In [None]:
res = top_vecs(qim)

In [None]:
res

In [None]:
plt.imshow(qim)
plt.show()
plt.close()

In [None]:
for dist, res_img in res:
    plt.imshow(res_img)
    plt.show()
    plt.close()

## Fine tuning FastAI

In [1]:
from fastai.vision import *
from fastai.metrics import error_rate

In [2]:
bs = 64

In [None]:
path = untar_data(URLs.PETS); path

Downloading https://s3.amazonaws.com/fast-ai-imageclas/oxford-iiit-pet


In [None]:
path.ls()

In [None]:
fnames = get_image_files(path_img)
fnames[:5]

In [None]:
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), 
                                   size=224, bs=bs).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3, figsize=(7,6))

In [None]:
print(data.classes)
len(data.classes),data.c

In [None]:
learn = cnn_learner(data, models.resnet34, metrics=error_rate)

In [None]:
learn.model

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.save('stage-1')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

losses,idxs = interp.top_losses()

len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
doc(interp.plot_top_losses)

In [None]:
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
interp.most_confused(min_val=2)

#### Unfreeze and fine-tune

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(1)

In [None]:
learn.load('stage-1');

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.unfreeze()
learn.fit_one_cycle(2, max_lr=slice(1e-6,1e-4))

#### Train bigger model

In [None]:
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(),
                                   size=299, bs=bs//2).normalize(imagenet_stats)

In [None]:
learn = cnn_learner(data, models.resnet50, metrics=error_rate)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(8)

In [None]:
learn.save('stage-1-50')

In [None]:
learn.unfreeze()
learn.fit_one_cycle(3, max_lr=slice(1e-6,1e-4))

In [None]:
learn.load('stage-1-50');

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.most_confused(min_val=2)