In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
! pip install autopep8

In [None]:
! pip install opencv-python

## Prerequisites

For this tutorial I reccomend you create new conda anvironment
```bash
conda create -n fast2 python=3.8.2 jupyter
```

## Install FastAI V2

In [None]:
! pip install -U fastai2

## Import fastAI v2 vision modules

In [None]:
from fastai2.vision.all import *

## Semantic segmentation with UNet model

Recall the UNet model:
<img src="images/od/unet.png" height="800" width="800">

Download the dataset.
#### Note Here I'll download the "tiny" version, but you should switch it with 
```python
untar_data(URLs.CAMVID)
```

In [None]:
path = untar_data(URLs.CAMVID_TINY)

Let's look into the data

In [None]:
path.ls()

In [None]:
(path / 'images').ls()

In [None]:
(path / 'labels').ls()

Load the class codes:

In [None]:
codes = np.loadtxt(path/'codes.txt', dtype=str)
codes

In [None]:
fnames = get_image_files(path/'images')
fnames[0]

In [None]:
?? get_image_files

Initialize labels

In [None]:
def label_func(fn): 
    return path/'labels'/f'{fn.stem}_P{fn.suffix}'

Initialize the data loader

In [None]:
dls = SegmentationDataLoaders.from_label_func(path, bs=8, fnames=fnames, label_func=label_func, codes=codes, 
                                              seed=2020)

Look into the actual images

In [None]:
dls.show_batch(max_n=8)

## Initialize the model

In [None]:
learn = unet_learner(dls, resnet34)

Look in the model

In [None]:
learn.model

Recall pixel-suffle up-sumpling layer:
<img src="images/od/pixel_shuffle_1.jpg" height="1000" width="1000">

Where are the concatenations?
<br/>
For this FastAi library uses so called runtime hooks

In [None]:
# A simple hook class that returns the input and output of a layer during forward/backward pass
class Hook():
    def __init__(self, module, backward=False):
        if backward==False:
            self.hook = module.register_forward_hook(self.hook_fn)
        else:
            self.hook = module.register_backward_hook(self.hook_fn)
    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output
    def close(self):
        self.hook.remove()

In [None]:
print(learn.__dict__)

Check the optimizer

In [None]:
learn.opt_func

Check the loss function:

In [None]:
learn.loss_func

In [None]:
?? FlattenedLoss

Check the type

In [None]:
type(learn.loss_func)

In [None]:
?? CrossEntropyLossFlat

In [None]:
?? BaseLoss

Check the model performance:

In [None]:
learn.show_results(max_n=6, figsize=(7, 8))

#### Fine-tune the model

In [None]:
learn.fine_tune(8)

Check the results

In [None]:
learn.show_results(max_n=6, figsize=(7, 8))

#### Save the model

In [None]:
learn.save('unet_cv_ft', with_opt=True)

#### Copy and download the model weights

In [None]:
(learn.path / learn.model_dir).ls()

In [None]:
! cp  {learn.path / learn.model_dir / 'unet_cv_ft.pth'} /

In [None]:
?? learn.save

#### Use data block API for image loading

In [None]:
camvid = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
                   get_items = get_image_files,
                   get_y = label_func,
                   splitter=RandomSplitter(),
                   batch_tfms=aug_transforms(size=(120,160)))

In [None]:
dls = camvid.dataloaders(path/"images", path=path, bs=8)

In [None]:
dls.show_batch(max_n=8)

In [None]:
?? get_image_files

In [None]:
?? get_files

#### Get meta-data for model initialization

In [None]:
dls.one_batch()[0].shape[-2:]

In [None]:
get_c(dls)

## Find learning rate

In [None]:
learn.lr_find()

fine-tuning means it will update all layers weights and biases with discriminative learning rate

In [None]:
?? learn.fine_tune

#### Task: Try train with different size (progressive growing) and different augmentations

## Object detection with Detectron2 library

The library Detectron2 in Facebook's library on top of the PyTorch for fast object detection and semantic segmentation training

#### Install library

In [None]:
! pip install -U cython pyyaml==5.1

In [None]:
! pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
! git clone https://github.com/facebookresearch/detectron2 detectron2_repo 
! pip install -e detectron2_repo

In [None]:
! pip install -U watermark

In [None]:
%watermark

In [None]:
%reload_ext watermark %watermark -v -p numpy, pandas, pycocotools, torch, torchvision, detectron2

In [None]:
! pip install seaborn

#### Prepare dataset

In [None]:
! pip install kaggle

In [None]:
from pathlib import Path

In [None]:
Path.home()

In [None]:
path = Path.home() /'.kaggle'
path.mkdir(exist_ok=True)

In [None]:
import json
import zipfile
import os

In [None]:
api_token = {"username":"levants","key":"your-key"}

json_path = path / 'kaggle.json'
with json_path.open(mode='w') as file:
    json.dump(api_token, file)

In [None]:
json_path

In [None]:
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list

In [None]:
! kaggle competitions download -c global-wheat-detection

In [None]:
datasets = Path('/content') / 'drive' / 'My\ Drive' / 'datasets'
! ls {datasets}

In [None]:
wheet_path = datasets / 'global-wheat-detection.zip'
wheet_path

In [None]:
! ls {data_path}

In [None]:
content = Path('/content')
content.mkdir(exist_ok=True)
data_path = content / 'wheet'
data_path.mkdir(exist_ok=True)

In [None]:
! cp {wheet_path} {data_path}

In [None]:
zip_ref = zipfile.ZipFile(data_path / 'global-wheat-detection.zip', 'r')
zip_ref.extractall(path=data_path)
zip_ref.close()

## Organize imports for Detectron2 library

In [None]:
import json
import logging
import os
import random

from pathlib import Path

import pandas as pd
import torch
from detectron2.config import get_cfg
from detectron2.data import (MetadataCatalog, DatasetCatalog)
from detectron2.engine import DefaultTrainer
from detectron2.structures import BoxMode
from detectron2.utils.logger import setup_logger
from sklearn.model_selection import train_test_split

from path_utils import root_path

## Convert dataset

In [None]:
from detectron2.structures import BoxMode

In [None]:
import pandas as pd

In [None]:
images_path = data_path / 'train' 
csv_path = data_path / 'train.csv'

In [None]:
df = pd.read_csv(csv_path)

In [None]:
df

In [None]:
class_names = df.source.unique().tolist()
classes = {class_name: idx for idx, class_name in enumerate(class_names)}
LABEL_NAMES = classes
LABEL_NAMES

In [None]:
df.image_id.unique().shape

In [None]:
import json

In [None]:
def create_dataset(df):
    dataset_dicts = []
    for image_id, img_name in enumerate(df.image_id.unique()):
        record = {}
        image_df = df[df.image_id == img_name]
        file_path = f'{images_path}/{img_name}.jpg'
        record['file_name'] = file_path
        record['image_id'] = image_id
        record['height'] = int(image_df.iloc[0].height)
        record['width'] = int(image_df.iloc[0].width)
        objs = []
        for _, row in image_df.iterrows():
            bbox_raw = json.loads(row.bbox)
            bbox = [int(bbox_raw[0]), int(bbox_raw[1]),
                    int(bbox_raw[0] + bbox_raw[2]), int(bbox_raw[1] + bbox_raw[3])]
            obj = dict(bbox=bbox,
                       bbox_mode=BoxMode.XYXY_ABS,
                       segmentation=[],
                       category_id=classes.get(row.source, 0),
                       iscrowd=0)
            objs.append(obj)
        record['annotations'] = objs
        dataset_dicts.append(record)

    return dataset_dicts

In [None]:
dataset = create_dataset(df)

In [None]:
import pickle

In [None]:
pickle_path = data_path / 'dataset.pkl'

In [None]:
with pickle_path.open(mode='wb') as fp:
    pickle.dump(dataset, fp)

In [None]:
with pickle_path.open(mode='rb') as fp:
    dataset = pickle.load(fp)

Data is ordered

In [None]:
dataset[1000: 1200]

let's shuffle dataset and then split it to training and test / validation parts

In [None]:
import random

In [None]:
random.shuffle(dataset)

In [None]:
dataset[1000: 1200]

In [None]:
len(dataset)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_dt, test_dt = train_test_split(dataset, test_size=0.2, random_state=2020, stratify=None)

In [None]:
len(train_dt), len(test_dt)

#### Register datasets in detectron format

In [None]:
from detectron2.data import (MetadataCatalog, DatasetCatalog)

In [None]:
TRAIN_VAL = ['train', 'val']

In [None]:
def _register_if_not(dataset_name: str, data_func: callable):
    """
    Register data if it is not already registered
    Args:
        dataset_name: data-set name
        data_func: data initialization function

    Returns:
        data catalog with registered dataset
    """
    if dataset_name in DatasetCatalog.list():
        print(f'Data-set {dataset_name} is already registered')
    else:
        DatasetCatalog.register(dataset_name, data_func)
        MetadataCatalog.get(dataset_name).set(thing_classes=class_names)
        print(f'Registration of the data-set {dataset_name} is done with labels {class_names}')

    return DatasetCatalog

In [None]:
def register_data_types(name: str, train_dicts: list, test_dicts: list) -> dict:
    """
    Register data types for training
    Args:
        name: name of data
        train_dicts: training data
        test_dicts: validation / test data

    Returns:
        data_catalogs: registered data catalogs
    """
    data_catalogs = dict()
    for d in TRAIN_VAL:
        data_catalog = _register_if_not(f'{name}_{d}', lambda: train_dicts if d == 'train' else test_dicts)
        data_catalogs[d] = data_catalog

    return data_catalogs

In [None]:
register_data_types('wheets', train_dt, test_dt)

#### Train the Faster-RCNN on our custom dataset

In [None]:
cfg_path = Path('configs') / 'faster_rcnn_X_101_32x8d_FPN_3x.yaml'

In [None]:
outputs_path = Path('output')
outputs_path.mkdir(exist_ok=True)

#### Set device

In [None]:
import torch

from detectron2.config import CfgNode
from detectron2.engine import DefaultTrainer
from detectron2.engine import launch
from detectron2.utils.logger import setup_logger

In [None]:
import logging
logging.basicConfig(level='DEBUG')

In [None]:
if torch.cuda.is_available():
    torch.cuda.set_device(0)

In [None]:
setup_logger()

#### Download pre-trained weights

In [None]:
R_RX_101: str = 'https://dl.fbaipublicfiles.com/detectron2/' \
                'COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl'

In [None]:
weighs_dir = Path('weights')
weighs_dir.mkdir(exist_ok=True)

In [None]:
! wget {R_RX_101} -P {weighs_dir}

In [None]:
weights_path = weighs_dir / 'model_final_68b088.pkl'

#### Configure model and dataset

In [None]:
from detectron2.config import get_cfg

In [None]:
cfg = get_cfg()
# Initialize model architecture
cfg.merge_from_file(str(cfg_path))
# Thresholds
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
# Configure model device bindings
cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Number of prediction
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(LABEL_NAMES)
# Set weights
cfg.MODEL.WEIGHTS = str(weights_path)

In [None]:
cfg.DATASETS.TRAIN = ('wheets_train',)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.DATALOADER.NUM_WORKERS_PB = 2
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.NUM_GPUS = 1
# Learning rate configuration
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 5000
cfg.SOLVER.STEPS = (1000, 1500)
cfg.SOLVER.GAMMA = 0.05
cfg.SOLVER.CHECKPOINT_PERIOD = 500
# Test configuration`
cfg.TEST.EVAL_PERIOD = 500
# Output directory
cfg.OUTPUT_DIR = str(outputs_path)

In [None]:
import os

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [None]:
trainer = DefaultTrainer(cfg)

In [None]:
trainer.resume_or_load(resume=False)

In [None]:
trainer.train()

#### Evaluate detection model

In [None]:
from detectron2.config import (get_cfg, CfgNode)
from detectron2.data.catalog import Metadata
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer, ColorMode

In [None]:
cfg_eval = get_cfg()
# Initialize model architecture
cfg_eval.merge_from_file(str(cfg_path))
# Thresholds
cfg_eval.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
# Configure model device bindings
cfg_eval.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Number of prediction
cfg_eval.MODEL.ROI_HEADS.NUM_CLASSES = len(LABEL_NAMES)
# Set weights
cfg_eval.MODEL.WEIGHTS = str(weights_path)

In [None]:
predictor = DefaultPredictor(cfg_eval)

In [None]:
class CustomPredictor(object):
    """Customized predictor"""

    def __init__(self, meta_data: Metadata, predictor: DefaultPredictor, scale: float = 1.0):
        self.meta_data = meta_data
        self.predictor = predictor
        self.scale = scale

    def forward(self, img: np.ndarray) -> tuple:
        """
        Run model and draw predicted bounding boxes
        Args:
            img: input image

        Returns:
            pred: predicted bounding boxes with labels
            pred_img: image with predicted bounding boxes
        """
        preds = self.predictor(img)
        pred_imgs = list()
        for pred in preds:
            vis_cl = Visualizer(img[:, :, ::-1], metadata=self.meta_data, scale=self.scale,
                                instance_mode=ColorMode.IMAGE)
            print(pred)
            vis = vis_cl.draw_instance_predictions(pred['instances'].to('cpu'))
            pred_img = vis.get_image()[:, :, ::-1]
            pred_imgs.append(pred_img)

        return preds, pred_imgs

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

In [None]:
test_metadata = MetadataCatalog.get('wheets_test')
test_metadata

In [None]:
model = CustomPredictor(test_metadata, predictor)

In [None]:
test_dir = Path('test_wheets')
test_dir.mkdir(exist_ok=True)

In [None]:
test_files = [str(test_path) for test_path in test_dir.iterdir() if test_path.suffix in {'.jpg', '.jpeg', '.png'}]
test_files

In [None]:
import cv2

In [None]:
imgs = [(img_path, cv2.imread(str(img_path), cv2.IMREAD_ANYCOLOR)) for img_path in test_files]

In [None]:
result_dir = Path('wheet_results')
result_dir.mkdir(exist_ok=True)

In [None]:
for idx, (img_pt, cv_img) in enumerate(imgs):
    prds, res_imgs = model(cv_img)
    cv2.imwrite(str(result_dir / img_pt.name), res_imgs[0])
    

## Questions?