In [1]:
import json 
import pandas as pd

# Evaluate YOLO Model on Test Datasets

In [2]:
ground_truth = json.load(open('/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/frames/full_dataset_annotated_fpp/test.json'))
predictions_nano = json.load(open('/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/evalutation/results/results_nano.json'))

In [13]:
import json
from typing import List, Dict, Tuple
import pandas as pd

def load_json(file_path: str) -> Dict:
    with open(file_path, "r") as file:
        return json.load(file)

def is_valid_bbox(bbox: List[float]) -> bool:
    return isinstance(bbox, list) and len(bbox) == 4 and all(isinstance(x, (int, float)) for x in bbox)

def bbox_iou(bbox1: List[float], bbox2: List[float], img_width: int, img_height: int) -> float:
    if not is_valid_bbox(bbox1) or not is_valid_bbox(bbox2):
        return 0.0

    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2

    # Denormalize the bounding boxes
    x1 *= img_width
    y1 *= img_height
    w1 *= img_width
    h1 *= img_height
    x2 *= img_width
    y2 *= img_height
    w2 *= img_width
    h2 *= img_height

    x1_min = x1 - w1 / 2
    y1_min = y1 - h1 / 2
    x1_max = x1 + w1 / 2
    y1_max = y1 + h1 / 2

    x2_min = x2 - w2 / 2
    y2_min = y2 - h2 / 2
    x2_max = x2 + w2 / 2
    y2_max = y2 + h2 / 2

    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    inter_area = max(0, inter_x_max - inter_x_min) * max(0, inter_y_max - inter_y_min)
    bbox1_area = w1 * h1
    bbox2_area = w2 * h2

    iou = inter_area / (bbox1_area + bbox2_area - inter_area)
    return iou

def group_annotations_by_video(annotations: List[Dict]) -> Dict[str, List[Dict]]:
    grouped_annotations = {}
    for video in annotations:
        video_id = video["video_id"]
        if video_id not in grouped_annotations:
            grouped_annotations[video_id] = []
        grouped_annotations[video_id].append(video)
    return grouped_annotations

def compare_annotations(
    gt_annotations: List[Dict], pred_annotations: List[Dict], img_width: int, img_height: int
) -> List[Dict]:
    gt_videos = group_annotations_by_video(gt_annotations)
    pred_videos = group_annotations_by_video(pred_annotations)

    video_results = []

    for video_id, gt_videos_list in gt_videos.items():
        pred_videos_list = pred_videos.get(video_id, [])
        
        for gt_video, pred_video in zip(gt_videos_list, pred_videos_list):
            gt_frames = {frame["frame_id"]: frame for frame in gt_video["frames"]}
            pred_frames = {frame["frame_id"]: frame for frame in pred_video["frames"]}

            class_iou_scores = [[] for _ in range(3)]
            
            for frame_id in gt_frames:
                if frame_id in pred_frames:
                    gt_frame = gt_frames[frame_id]
                    pred_frame = pred_frames[frame_id]
                    
                    gt_class_id = gt_frame["class_id"]
                    pred_class_ids = [d["class_id"] for d in pred_frame["detections"]]

                    for class_id in set([gt_class_id] + pred_class_ids):
                        gt_bbox = gt_frame["bbox"] if gt_frame["class_id"] == class_id else None
                        pred_bbox = next((d["bbox"] for d in pred_frame["detections"] if pred_frame["detections"]), None)
                        
                        if gt_bbox and pred_bbox:
                            iou = bbox_iou(gt_bbox, pred_bbox, img_width, img_height)
                            class_iou_scores[class_id].append(iou)
                        elif gt_bbox is None and pred_bbox is None:
                            class_iou_scores[class_id].append(1.0)
                        else:
                            class_iou_scores[class_id].append(0.0)

            class_avg_iou = [average_iou(scores) for scores in class_iou_scores]
            class_map, class_recall, class_precision = [], [], []

            for class_id in range(3):
                if class_iou_scores[class_id]:
                    class_map_value, class_recall_value, class_precision_value = calculate_map(
                        gt_video["frames"], pred_video["frames"], img_width, img_height, class_id
                    )
                    class_map.append(class_map_value)
                    class_recall.append(class_recall_value)
                    class_precision.append(class_precision_value)
                else:
                    class_map.append(0.0)
                    class_recall.append(0.0)
                    class_precision.append(0.0)

            video_results.append({
                "video_id": video_id,
                "average_iou_class_0": class_avg_iou[0],
                "average_iou_class_1": class_avg_iou[1],
                "average_iou_class_2": class_avg_iou[2],
                "map_class_0": class_map[0],
                "map_class_1": class_map[1],
                "map_class_2": class_map[2],
                "average_recall_class_0": class_recall[0],
                "average_recall_class_1": class_recall[1],
                "average_recall_class_2": class_recall[2],
                "average_precision_class_0": class_precision[0],
                "average_precision_class_1": class_precision[1],
                "average_precision_class_2": class_precision[2],
            })

    return video_results

def average_iou(iou_scores: List[float]) -> float:
    return sum(iou_scores) / len(iou_scores) if iou_scores else 0.0

def compute_ap(recalls: List[float], precisions: List[float]) -> float:
    recalls = [0.0] + recalls + [1.0]
    precisions = [0.0] + precisions + [0.0]
    for i in range(len(precisions) - 1, 0, -1):
        precisions[i - 1] = max(precisions[i - 1], precisions[i])
    ap = 0.0
    for i in range(1, len(recalls)):
        ap += (recalls[i] - recalls[i - 1]) * precisions[i]
    return ap

def calculate_map(
    gt_frames: List[Dict], pred_frames: List[Dict], img_width: int, img_height: int, class_id: int
) -> Tuple[float, float, float]:
    gt_bboxes = [frame["bbox"] for frame in gt_frames if frame["class_id"] == class_id]
    pred_bboxes = [
        detection["bbox"]
        for frame in pred_frames
        for detection in frame["detections"]
        if detection["class_id"] == class_id
    ]

    tp, fp, fn = 0, 0, len(gt_bboxes)

    for pred_bbox in pred_bboxes:
        matched = False
        for gt_bbox in gt_bboxes:
            if bbox_iou(pred_bbox, gt_bbox, img_width, img_height) > 0.5:
                matched = True
                tp += 1
                fn -= 1
                break
        if not matched:
            fp += 1

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    ap = compute_ap([recall], [precision])
    return ap, recall, precision

# Load data
gt_annotations = load_json(
    "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/frames/full_dataset_annotated_fpp/test.json"
)
pred_annotations = load_json(
    "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/evalutation/results/results_nano.json"
)

# Get the image size
img_width = 640
img_height = 480

# Compare annotations and compute metrics for each video
video_results = compare_annotations(gt_annotations, pred_annotations, img_width, img_height)

# Display results
df = pd.DataFrame(video_results)
print(df)

TypeError: list indices must be integers or slices, not NoneType

In [6]:
df

Unnamed: 0,video_id,average_iou,map,average_recall,average_precision
0,video_lab_platform_6,0.98482,1.0,1.0,1.0
1,test_indoor1,0.911021,0.974359,0.974359,1.0
2,video_lab_semiopen_1______3,0.974404,1.0,1.0,1.0
