In [None]:
%config Completer.use_jedi = False

## Train

#### First we train the default RetinaNet model on Detection data

In [None]:
!PYTHONPATH=$(pwd) python ./scripts/retinanet_train.py --lr 0.00001 --max_epoch 100 --batch_size 4 --tag retinanet_with_img_cls_bboxes --log_dir $(pwd)/experiments --train_percent .7 --use_p_of_data 1 --data_dir $(pwd)/../dataset

#### Then we train the feature extraction pyramid network on a image level classification task

In [None]:
!pip install py3nvml

In [None]:
# For GPU mamory trace use this instead of the cell bellow and uncomment the sys.trace... line in the script
!PYTHONPATH=$(pwd) CUDA_VISIBLE_DEVICES=0 GPU_DEBUG=0 python ./scripts/image_cls_train.py --lr 0.0001 --max_epoch 100 --batch_size 4 --tag retinanet_with_img_cls --pretrain $(pwd)/experiments/checkpoints/best_chpt_retinanet_with_img_cls_bboxes.pth --train_percent .85 --use_p_of_data 1 --data_dir $(pwd)/../data/train --log_dir $(pwd)/experiments

In [None]:
!PYTHONPATH=$(pwd) python ./scripts/image_cls_train.py --lr 0.0001 --max_epoch 100 --batch_size 8 --tag retinanet_with_img_cls_images --pretrain $(pwd)/experiments/checkpoints/best_chpt_retinanet_with_img_cls_bboxes.pth --train_percent .85 --use_p_of_data 1 --data_dir $(pwd)/../data/train --log_dir $(pwd)/experiments

#### Now, finetuning on detection should give better results

In [None]:
!PYTHONPATH=$(pwd) python ./scripts/retinanet_train.py --lr 0.00001 --max_epoch 100 --batch_size 4 --tag retinanet_with_img_cls_bboxes_fineyune_after_cls --pretrain $(pwd)/experiments/checkpoints/best_chpt_retinanet_with_img_cls_images.pth --log_dir $(pwd)/experiments --train_percent .7 --use_p_of_data 1 --data_dir $(pwd)/../dataset

In [None]:
!pwd

In [None]:
!nvidia-smi

In [None]:
import torchvision
import torch

## Dataset

In [None]:
from torch.utils.data import Dataset, DataLoader
import os
import cv2
from retinanet.datasets.bird import BirdDetection

In [None]:
ds_notf = BirdDetection(image_dir="../dataset/data", annotations_dir="../dataset/ann")

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
item = ds_notf[1]
for box in item[1]["boxes"]:
    cv2.rectangle(item[0], (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
plt.imshow(item[0])
plt.show()

In [None]:
from retinanet.datasets.transforms import *
from retinanet.datasets.utils import TransformDatasetWrapper

In [None]:
transform = Compose(
    [
        ToTensor(),
        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

In [None]:
ds = TransformDatasetWrapper(ds_notf, transform)

## Evaluation

In [None]:
from retinanet.model.detection.retinanet import retinanet_resnet50_fpn

In [None]:
model = retinanet_resnet50_fpn(num_classes=2, pretrained=False, pretrained_backbone=False)

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_retinanet_with_img_cls_bboxes_fineyune_after_cls.pth"))

In [None]:
idx = 10

In [None]:
model.eval()

predicted = model([ds[idx][0]])

In [None]:
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.0)

In [None]:
item = ds_notf[idx]
img = item[0]
oboxes = item[1]["boxes"]

keep = keep.numpy()
boxes = list(np.floor(predicted[0]["boxes"].detach().numpy()[keep]))
scores = list(predicted[0]["scores"].detach().numpy()[keep])

print(len(oboxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
        
plt.imshow(img,cmap='gray')
plt.show()