In [None]:
%config Completer.use_jedi = False

In [None]:
import torch
from torch import nn
import torchvision
import torch.nn.functional as F
from torch.utils.data import DataLoader, SubsetRandomSampler, Subset


from retinanet.model.detection.retinanet import retinanet_resnet50_fpn
from retinanet.model.detection.transform import GeneralizedRCNNTransform
from retinanet.model.utils import outputs_to_logits, logits_to_preds

from retinanet.datasets.bird import BirdDetection, BirdClassification
from retinanet.datasets.transforms import *
from retinanet.datasets.utils import TransformDatasetWrapper, train_val_split

import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
device_str = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_str)
print("Torch Using device:", device)

In [None]:
data_log_dir = "/workspace8/RetinaNet/experiments/dataset"
train_transform = Compose(
    [
        ToTensor(device),
        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

In [None]:
train_dataset_cls = BirdClassification()
train_dataset_cls.load(data_log_dir, file_name="train_cls")
val_dataset_cls = BirdClassification()
val_dataset_cls.load(data_log_dir, file_name="validation_cls")

train_dataset_cls = TransformDatasetWrapper(train_dataset_cls, train_transform)
val_dataset_cls = TransformDatasetWrapper(val_dataset_cls, train_transform)

In [None]:
train_dataset_det = BirdDetection()
train_dataset_det.load(data_log_dir, file_name="train_detection")
val_dataset_det = BirdDetection()
val_dataset_det.load(data_log_dir, file_name="validation_detection")

train_dataset_det = TransformDatasetWrapper(train_dataset_det, train_transform)
val_dataset_det = TransformDatasetWrapper(val_dataset_det, train_transform)

In [None]:
import sys
sys.path.insert(0, "/workspace8/video_toolkit/")
from VideoToolkit.tools import rescal_to_image, get_cv_resize_function
resize_func = get_cv_resize_function()

In [None]:
def get_features(model, images, device=None):
    transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    images, _ = transform(images, None)
    
    # get the features from the backbone
    features = model.backbone(images.tensors.to(device))
    
    if isinstance(features, torch.Tensor):
        features = OrderedDict([("0", features)])

    features = list(features.values())
    features = [feat.mean(1) for feat in features]
    return features

In [None]:
model = retinanet_resnet50_fpn(num_classes=2, pretrained=False, pretrained_backbone=False)

model = model.to(device)
model.eval()

In [None]:
def get_res(model, inp):
    model.eval()
    image, label = inp

    losses, bb_pred, cls_pred = model([image], [label])

    logit = outputs_to_logits(cls_pred)
    pred = logits_to_preds(logit)
    label = label["img_cls_labels"]
    
    return logit, pred, label

# def logits_to_preds(logits):
#     return (logits > torch.min(logits)).float()

def logits_to_preds(logits):
    return (logits > 0.5).float()

def get_errors(model, dataset):
    res = []
    for i in range(len(dataset)):
        inp = dataset[i]
        logits, preds, label = get_res(model, inp)

        if not torch.eq(preds, label).all():
            print(f"\n index: {i}")
            print(f"logits: {logits}")
            print(f"preds : {preds}")
            print(f"label : {label}")
            res.append(i)
    return res

## Train Detection from scratch

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_0_1_det_scratch.pth"))

In [None]:
idx = random.randint(0, len(train_dataset_cls)-1)
img = train_dataset_cls[idx][0]

model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

## Train Detection transfer learning

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_0_2_det_transferlr.pth"))

In [None]:
idx = random.randint(0, len(train_dataset_cls)-1)
img = train_dataset_cls[idx][0]

model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

## Train Image Level Classifier from scratch

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_1_1_img_cls_scratch.pth"))

In [None]:
err_indices = get_errors(model, train_dataset_cls)

In [None]:
len(err_indices)

In [None]:
idx = random.randint(0, len(train_dataset_cls))
img = train_dataset_cls[idx][0]

In [None]:
idx = random.randint(0, len(err_indices))
img = train_dataset_cls[err_indices[idx]][0]

In [None]:
model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

### Finetune on Detection Task
#### (from scratch)

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_1_2_ft_det_scratch.pth"))

In [None]:
idx = random.randint(0, len(train_dataset_cls)-1)
img = train_dataset_cls[idx][0]

model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

## Train Image Level Classifier with transfer learning

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_avg_2_1_img_cls_transferlr.pth"))

In [None]:
err_indices = get_errors(model, train_dataset_cls)

In [None]:
idx = random.randint(0, len(train_dataset_cls))
img = train_dataset_cls[idx][0]

In [None]:
idx = random.randint(0, len(err_indices))
img = train_dataset_cls[12][0]

In [None]:
model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
# for box, score in zip(boxes, scores):
#     if score > 0.2:
#         cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
# for box, score in zip(boxes, scores):
#     if score > 0.2:
#         cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

### Finetune on Detection Task
#### (transfer learning)

In [None]:
model.load_state_dict(torch.load("/workspace8/RetinaNet/experiments/checkpoints/best_chpt_2_2_ft_det_transferlr.pth"))

In [None]:
idx = random.randint(0, len(train_dataset_cls)-1)
img = train_dataset_cls[idx][0]

model.eval()
#get features
features = get_features(model, [img], device)

imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))

In [None]:
model.eval()
#get features
images = [img]

transform = GeneralizedRCNNTransform(800, 1333, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
images, _ = transform(images, None)

# get the features from the backbone
features = model.backbone(images.tensors.to(device))

if isinstance(features, torch.Tensor):
    features = OrderedDict([("0", features)])

features = list(features.values())

weights = nn.Parameter(model.head.image_classification_head.fc.weight.t().unsqueeze(0))

get_weight = lambda weight, idx, cls: weight[:,:,cls].view(5, 256)[idx,:][None, :, None, None] 

features = [(feat * get_weight(weights, i, 1)).mean(1) for i, feat in enumerate(features)]


imact = [feat.squeeze().cpu().detach().numpy() for feat in features]

# get predictions
predicted = model([img])
keep = torchvision.ops.nms(predicted[0]["boxes"], predicted[0]["scores"], 0.1)
keep = keep.cpu().numpy()
boxes = list(np.floor(predicted[0]["boxes"].cpu().detach().numpy()[keep]))
scores = list(predicted[0]["scores"].cpu().detach().numpy()[keep])

# Visualize
fig, axarr = plt.subplots(2, 3, figsize=(15,10))

# print(img.shape)
# for feat in imact:
#     print(feat.shape)
#     print(resize_func(feat, img.shape[1:]).shape)

img_n = (img.cpu().permute((1, 2, 0)).numpy().copy() * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))

# visualize boxes
print(len(boxes))
for box, score in zip(boxes, scores):
    if score > 0.2:
        cv2.rectangle(img_n, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

axarr[0, 0].imshow(img_n)
# visualize features
for j in range(1, 6):
    axarr[j//3, j%3].imshow(resize_func(imact[j-1], img_n.shape[:2]))