# MEng-Team-Project-ML

This notebook contains the validation of our yolov8 model for our dataset.

## Get Ground Truth and Predicted Datasets

### Load COCO Classes

In [34]:
COCO_CLASSES = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush' ]

### Load Pre-Saved Predicted Analytical Dataset (SQLite)

In [35]:
import os
from traffic_ml.lib.gdrive import get_gdrive_id, download_file_from_google_drive

TEST_FILE_FNAME = "test.db"
TEST_FILE_LINK  = "https://drive.google.com/file/d/1EEiUSRbU8ehg_UaOJ5x9ci8DTOD-TqrA/view?usp=sharing"

gdrive_id = get_gdrive_id(TEST_FILE_LINK)
out = download_file_from_google_drive(gdrive_id, f"./{TEST_FILE_FNAME}")

if os.path.exists(TEST_FILE_FNAME):
    print("Downloaded successfully!")
else:
    print(f"Error downloading: {TEST_FILE_FNAME}")

Downloaded successfully!


### Load Data into Dataframes

In [36]:
import sqlite3
import pandas as pd

# NOTE: Overwrite this for a local analysis
# TEST_FILE_FNAME = "00001.01350_2022-12-07T15-35-24.000Z.db"
con = sqlite3.connect("test.db")

# Detected Objects per Frame
detections_df = pd.read_sql_query("SELECT * FROM detection;", con)

con.close()

In [37]:
detections_df

Unnamed: 0,id,frame,bbox_x,bbox_y,bbox_w,bbox_h,cls,label,conf,det_id
0,1,3,1640.0,184.0,229.0,183.0,5.0,bus,0.913574,1.0
1,2,3,1508.0,334.0,216.0,128.0,2.0,car,0.906738,2.0
2,3,3,757.0,305.0,86.0,52.0,2.0,car,0.830078,3.0
3,4,3,1068.0,302.0,26.0,74.0,0.0,person,0.706543,4.0
4,5,3,167.0,277.0,236.0,98.0,7.0,truck,0.592773,5.0
...,...,...,...,...,...,...,...,...,...,...
6477,6478,900,164.0,272.0,238.0,99.0,7.0,truck,0.550781,49.0
6478,6479,900,1628.0,281.0,127.0,82.0,2.0,car,0.869629,94.0
6479,6480,900,1502.0,282.0,25.0,58.0,0.0,person,0.518555,138.0
6480,6481,900,975.0,309.0,33.0,73.0,0.0,person,0.736328,148.0


In [112]:
detections_df[(detections_df["det_id"] == 1.0)].head()

Unnamed: 0,id,frame,bbox_x,bbox_y,bbox_w,bbox_h,cls,label,conf,det_id
0,1,3,1640.0,184.0,229.0,183.0,5.0,bus,0.913574,1.0
6,7,4,1635.0,183.0,233.0,185.0,5.0,bus,0.901367,1.0
12,13,5,1629.0,183.0,239.0,186.0,5.0,bus,0.891113,1.0
18,19,6,1629.0,183.0,239.0,188.0,5.0,bus,0.898438,1.0
24,25,7,1623.0,182.0,245.0,191.0,5.0,bus,0.908203,1.0


### Load Ground Truth Analytical Dataset (SQLite)

In [39]:
import os
from traffic_ml.lib.gdrive import get_gdrive_id, download_file_from_google_drive

TEST_FILE_FNAME = "test_30.json"
TEST_FILE_LINK  = "https://drive.google.com/file/d/1-AistYuoir023crLFhhAYMAIf52ucOEr/view?usp=sharing"

gdrive_id = get_gdrive_id(TEST_FILE_LINK)
out = download_file_from_google_drive(gdrive_id, f"./{TEST_FILE_FNAME}")

if os.path.exists(TEST_FILE_FNAME):
    print("Downloaded successfully!")
else:
    print(f"Error downloading: {TEST_FILE_FNAME}")

Downloaded successfully!


### Load Ground Truth

In [40]:
from traffic_ml.lib.convertor import Annotations

In [41]:
ground_truth_df = Annotations.from_darwin("test_30.json")

In [110]:
ground_truth_df[ground_truth_df["frame"] >= 3]

Unnamed: 0,frame,bbox_x,bbox_y,bbox_w,bbox_h,label,conf,det_id
64,3,1067.0,298.0,29.0,80.0,person,1.0,1.0
1864,3,756.0,302.0,90.0,54.0,car,1.0,4.0
964,3,918.0,304.0,25.0,60.0,person,1.0,2.0
3,3,1490.0,328.0,229.0,143.0,car,1.0,0.0
1731,3,1633.0,183.0,242.0,180.0,bus,1.0,3.0
...,...,...,...,...,...,...,...,...
7709,899,1471.0,278.0,19.0,51.0,person,1.0,16.0
8361,899,859.0,300.0,30.0,96.0,person,1.0,17.0
3914,899,559.0,281.0,18.0,14.0,car,1.0,7.0
6965,899,829.0,305.0,43.0,110.0,person,1.0,13.0


## Data Validation

This section performs validation of our YOLOv8 detection model and StrongSORT tracker algorithm together. We use the ground truth annotated JSON file along with the output SQL from the det/tracking module and convert both to MOT16 format. We then use `motmetrics`, which is a library that automatically calculates MOT (Multi-Object Tracking) statistics

### MOT Metrics Method

In [103]:
def motMetricsEnhancedCalculator(gtSource, tSource):
  # import required packages
  import motmetrics as mm
  import numpy as np
  
  # load ground truth
  gt = np.loadtxt(gtSource, delimiter=',')

  # load tracking output
  t = np.loadtxt(tSource, delimiter=',')

  # Create an accumulator that will be updated during each frame
  acc = mm.MOTAccumulator(auto_id=True)

  # Max frame number maybe different for gt and t files
  for frame in range(int(gt[:,0].max())):
    frame += 1 # detection and frame numbers begin at 1

    # select id, x, y, width, height for current frame
    # required format for distance calculation is X, Y, Width, Height \
    # We already have this format
    gt_dets = gt[gt[:,0]==frame,1:6] # select all detections in gt
    t_dets = t[t[:,0]==frame,1:6] # select all detections in t

    C = mm.distances.iou_matrix(gt_dets[:,1:], t_dets[:,1:], \
                                max_iou=0.5) # format: gt, t

    # Call update once for per frame.
    # format: gt object ids, t object ids, distance
    acc.update(gt_dets[:,0].astype('int').tolist(), \
              t_dets[:,0].astype('int').tolist(), C)

  mh = mm.metrics.create()

  summary = mh.compute(acc, metrics=['num_frames', 'idf1', 'idp', 'idr', \
                                     'recall', 'precision', 'num_objects', \
                                     'mostly_tracked', 'partially_tracked', \
                                     'mostly_lost', 'num_false_positives', \
                                     'num_misses', 'num_switches', \
                                     'num_fragmentations', 'mota', 'motp' \
                                    ], \
                      name='acc')

  strsummary = mm.io.render_summary(
      summary,
      #formatters={'mota' : '{:.2%}'.format},
      namemap={'idf1': 'IDF1', 'idp': 'IDP', 'idr': 'IDR', 'recall': 'Rcll', \
               'precision': 'Prcn', 'num_objects': 'GT', \
               'mostly_tracked' : 'MT', 'partially_tracked': 'PT', \
               'mostly_lost' : 'ML', 'num_false_positives': 'FP', \
               'num_misses': 'FN', 'num_switches' : 'IDsw', \
               'num_fragmentations' : 'FM', 'mota': 'MOTA', 'motp' : 'MOTP',  \
              }
  )
  print(strsummary)

### Convert DataFrames to MOT16 Format

`<frame number>, <object id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <confidence>, <x>, <y>, <z>`\
GT: `<frame number>, <class id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, 1, -1, -1, -1`\
PRED: `<frame number>, <class id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, conf, -1, -1, -1`

In [104]:
ground_truth_df.columns

Index(['frame', 'bbox_x', 'bbox_y', 'bbox_w', 'bbox_h', 'label', 'conf',
       'det_id'],
      dtype='object')

In [105]:
def convert_row_to_mot16(row):
    frame_idx = str(row["frame"] + 1)
    x         = str(row["bbox_x"])
    y         = str(row["bbox_y"])
    w         = str(row["bbox_w"])
    h         = str(row["bbox_h"])
    l_id      = str(COCO_CLASSES.index(row["label"]))
    c         = str(row["conf"])
    out = ",".join([
        frame_idx,
        l_id,
        x,
        y,
        w,
        h,
        c,
        str(-1),
        str(-1),
        str(-1)])
    return out

def convert_to_mot16(analytics_df):
    lines = analytics_df.apply(lambda row: convert_row_to_mot16(row), axis=1)
    out   = "\n".join(lines)
    return out

### Entire DataFrames

In [106]:
# Convert
ground_truth_mot16 = convert_to_mot16(ground_truth_df)

# Save
with open("ground_truth_mot16.txt", "w") as f:
    f.write(ground_truth_mot16)

In [107]:
# Convert
pred_mot16 = convert_to_mot16(detections_df)

# Save
with open("pred_mot16.txt", "w") as f:
    f.write(pred_mot16)

### Sanity Test - Validation (GT vs GT)

In [108]:
motMetricsEnhancedCalculator(
    "ground_truth_mot16.txt",
    "ground_truth_mot16.txt")

     num_frames      IDF1       IDP       IDR  Rcll  Prcn    GT  MT  PT  ML  FP  FN  IDsw  FM  MOTA  MOTP
acc         900  1.820812  1.820812  1.820812   1.0   1.0  8985   4   0   0   0   0     0   0   1.0   0.0


### Real - Validation (GT vs Pred)

In [109]:
motMetricsEnhancedCalculator(
    "ground_truth_mot16.txt",
    "pred_mot16.txt")

     num_frames      IDF1       IDP     IDR      Rcll      Prcn    GT  MT  PT  ML    FP    FN  IDsw    FM      MOTA      MOTP
acc         900  1.588254  1.390015  1.3665  0.606121  0.840951  8985   1   2   1  1030  3539    34  1151  0.487702  0.191918
