### Importing libraries

In [5]:
import os
import numpy as np
import fiftyone.zoo as foz
import fiftyone.types as fot
import json

import torch
import pandas as pd
from PIL import Image, ImageDraw
from transformers import AutoProcessor, AutoModelForCausalLM
import matplotlib.pyplot as plt
import seaborn as sns

Investigating a single file.

In [6]:
GT = pd.read_csv(
    'ground_truth/000000000139.csv',
    index_col=0,
    names=["x", "y", "w", "h", "id", "object"],
    header=None,
    skiprows = 1
).drop(columns='id')                                                                                                                    

In [7]:
GT

Unnamed: 0,x,y,w,h,object
0,236,142,24,69,potted plant
1,7,167,149,94,tv
2,557,209,81,78,tv
3,358,218,56,102,chair
4,290,217,61,98,chair
5,413,223,30,81,chair
6,317,219,21,11,chair
7,412,157,53,138,person
8,384,172,15,35,person
9,512,205,14,15,microwave


In [8]:
GT

Unnamed: 0,x,y,w,h,object
0,236,142,24,69,potted plant
1,7,167,149,94,tv
2,557,209,81,78,tv
3,358,218,56,102,chair
4,290,217,61,98,chair
5,413,223,30,81,chair
6,317,219,21,11,chair
7,412,157,53,138,person
8,384,172,15,35,person
9,512,205,14,15,microwave


In [9]:
GT.rename(columns={'x.1': 'w'}, inplace=True)

Investigating predictions from the model.

In [10]:
PT = pd.read_csv('model_pred/000000000139.csv',index_col=0)

In [11]:
PT

Unnamed: 0,x,y,w,h,object
0,361,220,55,98,chair
1,294,217,56,100,chair
2,410,220,30,83,chair
3,448,120,12,21,clock
4,320,230,125,88,dining table
5,409,157,54,138,person
6,384,172,15,34,person
7,550,249,53,151,potted plant
8,334,176,45,55,potted plant
9,232,173,33,39,potted plant


In [12]:
GT

Unnamed: 0,x,y,w,h,object
0,236,142,24,69,potted plant
1,7,167,149,94,tv
2,557,209,81,78,tv
3,358,218,56,102,chair
4,290,217,61,98,chair
5,413,223,30,81,chair
6,317,219,21,11,chair
7,412,157,53,138,person
8,384,172,15,35,person
9,512,205,14,15,microwave


Calculating IOU

In [13]:
def calculate_iou(box1, box2):
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    x1_max = x1 + w1
    y1_max = y1 + h1
    x2_max = x2 + w2
    y2_max = y2 + h2

    xi1 = max(x1, x2)
    yi1 = max(y1, y2)
    xi2 = min(x1_max, x2_max)
    yi2 = min(y1_max, y2_max)

    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height

    box1_area = w1 * h1
    box2_area = w2 * h2
    union_area = box1_area + box2_area - inter_area

    iou = inter_area / union_area if union_area != 0 else 0
    return iou


Finding matches from GT to PT.

In [14]:
matches = {}
threshold = 0.3
used_pt_indices = set()

TP = 0


for gt_idx, gt_box in GT.iterrows():
    box1 = [gt_box['x'], gt_box['y'], gt_box['w'], gt_box['h']]
    ious = []

    if PT.empty:
        matches[gt_idx] = None
        continue

    for pt_idx, pt_box in PT.iterrows():
        if pt_idx in used_pt_indices:
            ious.append(-1)  
            continue

        box2 = [pt_box['x'], pt_box['y'], pt_box['w'], pt_box['h']]
        iou = calculate_iou(box1=box1, box2=box2)
        ious.append(iou)

    match_idx = np.argmax(ious)
    if ious[match_idx] >= threshold:
        matched_pt_idx = PT.index[match_idx]
        matches[gt_idx] = PT.loc[matched_pt_idx]
        used_pt_indices.add(matched_pt_idx)
    else:
        matches[gt_idx] = None


In [15]:
GT

Unnamed: 0,x,y,w,h,object
0,236,142,24,69,potted plant
1,7,167,149,94,tv
2,557,209,81,78,tv
3,358,218,56,102,chair
4,290,217,61,98,chair
5,413,223,30,81,chair
6,317,219,21,11,chair
7,412,157,53,138,person
8,384,172,15,35,person
9,512,205,14,15,microwave


Calculating metrics for a single file.

In [16]:
matches = {}
threshold = 0.5
used_pt_indices = set()

TP = 0
FP = 0
FN = 0

for gt_idx, gt_box in GT.iterrows():

    box1 = [gt_box['x'], gt_box['y'], gt_box['w'], gt_box['h']]
    gt_class = gt_box['object']
    
    ious = []
    class_matches = []

    if PT.empty:
        matches[gt_idx] = None
        FN += 1
        continue

    for pt_idx, pt_box in PT.iterrows():
        if pt_idx in used_pt_indices:
            ious.append(-1)
            class_matches.append(False)
            continue

        box2 = [pt_box['x'], pt_box['y'], pt_box['w'], pt_box['h']]
        iou = calculate_iou(box1=box1, box2=box2)
        ious.append(iou)
        class_matches.append(pt_box['object'] == gt_class)

    match_idx = np.argmax(ious)
    if ious[match_idx] >= threshold and class_matches[match_idx]:
        matched_pt_idx = PT.index[match_idx]
        matches[gt_idx] = PT.loc[matched_pt_idx]
        used_pt_indices.add(matched_pt_idx)
        TP += 1
    else:
        matches[gt_idx] = None
        FN += 1

# FP = predicted boxes which didn’t match any GT box
FP = len(PT) - len(used_pt_indices)

# Metrics
precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
accuracy = TP / len(GT) if len(GT) != 0 else 0
f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

print(f"True Positives: {TP}")
print(f"False Positives: {FP}")
print(f"False Negatives: {FN}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")


True Positives: 12
False Positives: 7
False Negatives: 8
Precision: 0.6316
Recall: 0.6000
Accuracy: 0.6000
F1-score: 0.6154


Processing for a batch of 1k files

In [17]:
def process_single_file(gt_path, pt_path, threshold=0.5):
    GT = pd.read_csv(gt_path,  index_col=0, names=["x", "y", "w", "h", "id", "object"], header=None,skiprows = 1).drop(columns='id')
                                                                                                                      
    PT = pd.read_csv(pt_path, index_col=0, names=["x", "y", "w", "h", "object"], header=None,skiprows = 1)

    matches = {}
    used_pt_indices = set()
    TP = FP = FN = 0

    for gt_idx, gt in GT.iterrows():
        gt_box = [gt['x'], gt['y'], gt['w'], gt['h']]
        gt_class = gt['object']
        best_iou, best_idx = -1, -1

        for pt_idx, pt in PT.iterrows():
            if pt_idx in used_pt_indices or pt['object'] != gt_class:
                continue
            pt_box = [pt['x'], pt['y'], pt['w'], pt['h']]
            iou = calculate_iou(gt_box, pt_box)
            if iou > best_iou:
                best_iou, best_idx = iou, pt_idx

        if best_iou >= threshold:
            used_pt_indices.add(best_idx)
            TP += 1
        else:
            FN += 1

    FP = len(PT) - len(used_pt_indices)

    def safe_div(a, b): return a / b if b != 0 else 0
    precision = safe_div(TP, TP + FP)
    recall = safe_div(TP, TP + FN)
    accuracy = safe_div(TP, len(GT))
    f1 = safe_div(2 * precision * recall, precision + recall)

    return {
        'file': os.path.basename(gt_path),
        'TP': TP, 'FP': FP, 'FN': FN,
        'precision': precision,
        'recall': recall,
        'accuracy': accuracy,
        'f1_score': f1
    }


In [18]:
results = []
gt_dir = 'ground_truth'
pt_dir = 'model_pred'

for filename in np.sort(os.listdir(gt_dir)):
    if filename.endswith('.csv'):
        gt_path = os.path.join(gt_dir, filename)
        pt_path = os.path.join(pt_dir, filename.replace('gt_', 'pt_'))
        if os.path.exists(pt_path):
            result = process_single_file(gt_path, pt_path)
            results.append(result)

metrics_df = pd.DataFrame(results)


In [19]:
metrics_df

Unnamed: 0,file,TP,FP,FN,precision,recall,accuracy,f1_score
0,000000000139.csv,12,7,8,0.631579,0.600000,0.600000,0.615385
1,000000000285.csv,1,0,0,1.000000,1.000000,1.000000,1.000000
2,000000000632.csv,6,10,12,0.375000,0.333333,0.333333,0.352941
3,000000000724.csv,2,0,2,1.000000,0.500000,0.500000,0.666667
4,000000000776.csv,1,0,3,1.000000,0.250000,0.250000,0.400000
...,...,...,...,...,...,...,...,...
995,000000118594.csv,2,0,0,1.000000,1.000000,1.000000,1.000000
996,000000118921.csv,2,0,1,1.000000,0.666667,0.666667,0.800000
997,000000119038.csv,3,0,0,1.000000,1.000000,1.000000,1.000000
998,000000119088.csv,4,0,0,1.000000,1.000000,1.000000,1.000000


In [20]:
total_TP = metrics_df["TP"].sum()
total_FP = metrics_df["FP"].sum()
total_FN = metrics_df["FN"].sum()

In [21]:
acc = total_TP/(total_TP+total_FP+total_FN)
p = total_TP/(total_TP+total_FP)
r = total_TP/(total_TP+total_FN)

acc,p,r

(np.float64(0.5429718875502008),
 np.float64(0.7579689251962197),
 np.float64(0.6568573014991671))