# Notebook to save predictions to 51

## Imports

In [1]:
import fiftyone as fo
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.models as models
import cv2
from PIL import Image 
from utils.general import check_img_size
from torchvision import transforms
from torchvision.transforms import (
    ToTensor,
    ToPILImage,
    Resize
)
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from models.common import DetectMultiBackend
from utils.torch_utils import select_device, smart_inference_mode
from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
                           coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                           scale_boxes, xywh2xyxy, xyxy2xywh)
%matplotlib inline

## Get the dataset and filter it

In [90]:
dataset = fo.load_dataset("esmart_context")

In [87]:
fo.launch_app(dataset, port=5151)

Dataset:          esmart_context
Media type:       image
Num samples:      41730
Selected samples: 0
Selected labels:  0
Session URL:      http://localhost:5151/

## Define the class names ( ⚠️ to the order)

In [5]:
class_names = ['dry', 'snowy', 'wet']

In [6]:
names = ['speed_limit', "rw_tcd", "highway",  "interchange", "urban", "rural-paved", "rural-dirt", "parking", 
         "res1", "res2"]

## Load the model 

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cuda device


In [8]:
IMAGE_SIZE = (512, 512) 

In [9]:
WEIGHTS_PATH = "runs/train-mlt/exp226/weights/last.pt"

model = DetectMultiBackend(WEIGHTS_PATH, dnn=False, fp16=False)
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
imgsz = check_img_size(IMAGE_SIZE, s=stride)  # check image size
half = model.fp16  # FP16 supported on limited backends with CUDA
model = model.to(device)
model.eval()

Fusing layers... 
YOLOv5s_mlt summary: 163 layers, 7697578 parameters, 0 gradients, 16.4 GFLOPs


DetectMultiBackend(
  (model): HybridModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (act): SiLU(inplace=True)
      )
      (2): C3(
        (cv1): Conv(
          (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
          (act): SiLU(inplace=True)
        )
        (cv3): Conv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
          (act): SiLU(inplace=True)
        )
        (m): Sequential(
          (0): Bottleneck(
            (cv1): Conv(
              (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
              (act): SiLU(inplace=True)
            )
            (cv2): Conv(
  

In [10]:
imgsz

[512, 512]

## Save predictions to 51

In [11]:
INFER_FIFTY_ONE = True # for "security"

In [24]:
model.fp16
#val_transform = transforms.Compose([Resize(IMAGE_SIZE), ToTensor()])

False

In [None]:
# Delete old predictions
dataset.delete_sample_field("detection_yolo-mlt")

In [88]:
temperature = 1

if INFER_FIFTY_ONE:
    with fo.ProgressBar() as pb:
        for sample in pb(dataset):
            if 'VAL_RC_SEQ_2' in sample.tags:

                # image preprocessing
                bs = 1  # batch_size
                det = None
                dataset = LoadImages(sample["filepath"], img_size=imgsz, stride=32,
                                        vid_stride=1)

                for path, im, im0s, vid_cap, s in dataset:
                    #print(im.shape)
                    #print(im0s.shape)

                    im = torch.from_numpy(im).to(device)
                    im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
                    im /= 255  # 0 - 255 to 0.0 - 1.0
                    if len(im.shape) == 3:
                        im = im[None]  # expand for batch dim\
                    #print(path)
                    #print("im shape", im.shape)
                    # inference
                    #model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *IMAGE_SIZE))  # warmup
                    preds = model(im, augment=False, visualize=False)
                    (pred, cls_pred) = preds
                    pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45,
                                               agnostic=False, max_det=1000)
                    #print(pred)

                    # classification 
                    cls_pred = cls_pred / temperature  # temperature scaling
                    #print("cls_pred", cls_pred)
                    y_softmaxed = F.softmax(cls_pred, dim=1)
                    #print(y_softmaxed))
                    conf, class_num = torch.max(y_softmaxed, 1)
                    category = class_names[class_num]
                    confidence = round(float(conf[0]),3)
                    
                    # detection
                    detections = []
                    for si, det in enumerate(pred):
       
                        gn = torch.tensor(im0s.shape)[[1, 0, 1, 0]]  # normalization gain whwh
                        if len(det):
                            # Rescale boxes from img_size to im0 size
                            det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0s.shape).round()

                        for *xyxy, conf, cls in reversed(det):
                            #print("xyxy", xyxy)
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                            line = (cls, *xywh, conf)
                            #print("line", line)
                            x, y, w, h = xywh
                            x = x -w/2
                            y = y -h/2
                            category_det = names[int(cls.item())]
                            detections.append(
                                fo.Detection(
                                    label=category_det,
                                    bounding_box=[x, y, w, h],
                                    confidence=conf
                                )
                            )
                sample["detections_yolo-mlt"] = fo.Detections(detections=detections)
                sample["classification_yolo-mlt"] = fo.Classifications(
                    classifications=[fo.Classification(label=category,confidence=confidence)])
                sample.save()

 100% |█████████████| 41730/41730 [6.3m elapsed, 0s remaining, 26.9 samples/s]       


 100% |█████████████| 41730/41730 [6.3m elapsed, 0s remaining, 26.9 samples/s]       


# mAP evaluation between the ground truth and the detection.

In [91]:
validation = dataset.match_tags("VAL_RC_SEQ_2")

In [82]:
results = validation.evaluate_detections(
    "detections_yolo-mlt",
    gt_field="ground_truth",
    eval_key="eval",
    compute_mAP=True,
)
# Get the 10 most common classes in the dataset
counts = dataset.count_values("ground_truth.detections.label")
classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10]

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top10)

Evaluating detections...


Evaluating detections...


 100% |█████████████████| 518/518 [16.3s elapsed, 0s remaining, 31.5 samples/s]      


 100% |█████████████████| 518/518 [16.3s elapsed, 0s remaining, 31.5 samples/s]      


Performing IoU sweep...


Performing IoU sweep...


 100% |█████████████████| 518/518 [2.1s elapsed, 0s remaining, 280.1 samples/s]      


 100% |█████████████████| 518/518 [2.1s elapsed, 0s remaining, 280.1 samples/s]      


              precision    recall  f1-score   support

roadwork_tcd       0.00      0.00      0.00       212
       lanes       0.00      0.00      0.00       481
 speed_limit       0.92      0.89      0.91       630

   micro avg       0.92      0.43      0.58      1323
   macro avg       0.31      0.30      0.30      1323
weighted avg       0.44      0.43      0.43      1323



TODO: investigate why it's returning 0 for some classes

In [83]:
print(results.mAP())

0.20475195405620036
