In [1]:
from ultralytics import YOLO
import pandas as pd
import numpy as np
import os
import imagesize

In [2]:
class_names = {
    0:'Person',
    1:'Car',
    2:'Truck',
    3:'UAV',
    4:'Aircraft',
    5:'Ship'
}

In [3]:
def inspect_dataset(
        target_dataset_root,  # ../datasets/new_dataset
        target_dataset_slice,  # train,test,val
):
 info = []  # list of lists, each list corresponds to an instance [cls_id, x, y, w, h, img]

 target_labels_dir = os.path.join(target_dataset_root, 'labels', target_dataset_slice)

 # Iterate over all files in the original dataset labels folder
 for filename in os.listdir(target_labels_dir):
  if filename.endswith('.txt'):
   # Read file
   with open(os.path.join(target_labels_dir, filename), "r") as f:
    # Iterate over instances in image and get present class ids
    for line in f:
     line_data = []
     # label data
     line_data = line.split()
     # Image name
     line_data.append(os.path.splitext(filename)[0])
     # Image size: could be done at image level and not row level
     img_path = os.path.join(target_dataset_root, 'images', target_dataset_slice,
                             os.path.splitext(filename)[0] + '.jpg')
     img_w, img_h = imagesize.get(img_path)
     line_data.extend([img_w, img_h])
     line_data.extend([img_path, os.path.join(target_labels_dir, filename)])
     # Append line data to info
     info.append(line_data)

 df = pd.DataFrame(info, columns=['new_class_id', 'xcn', 'ycn', 'wn', 'hn', 'img', 'img_w', 'img_h', 'image_path', 'label_path'])
 df = df.astype(
  {'new_class_id': 'int32', 'xcn': 'float32', 'ycn': 'float32', 'wn': 'float32', 'hn': 'float32', 'img': 'int64',
   'img_w': 'float32', 'img_h': 'float32', 'image_path': 'string', 'label_path': 'string'})
 df['class_name'] = df['new_class_id'].map(class_names)
 return df

In [4]:
df_val = inspect_dataset('/Users/johnny/Projects/datasets/custom_dataset_v2/', 'val')

In [5]:
df_val

Unnamed: 0,new_class_id,xcn,ycn,wn,hn,img,img_w,img_h,image_path,label_path,class_name
0,0,0.104688,0.515258,0.112500,0.152582,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person
1,0,0.741406,0.524648,0.098438,0.171362,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person
2,0,0.459375,0.311033,0.106250,0.265258,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person
3,1,0.113008,0.516769,0.038066,0.033807,3644,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car
4,3,0.178125,0.514583,0.142188,0.129167,5235,1280.0,720.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV
...,...,...,...,...,...,...,...,...,...,...,...
95016,1,0.406055,0.520774,0.016188,0.020995,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car
95017,1,0.440119,0.525339,0.023608,0.026472,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car
95018,0,0.455295,0.534467,0.012141,0.048380,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person
95019,1,0.558832,0.561395,0.087012,0.082156,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car


In [6]:
print(f"The number of objects is {len(df_val)}")
print(f"The number of images is {len(df_val['img'].unique())}")

The number of objects is 95021
The number of images is 13205


In [7]:
bin_edges = [0, 16**2, 32**2, 96**2, float('inf')]
bin_labels = ['Tiny', 'Small', 'Medium', 'Large']
df_val['bbox_area'] = (df_val['wn']*df_val['img_w'])*(df_val['hn']*df_val['img_h'])
df_val['bbox_size_category'] = pd.cut(df_val['bbox_area'], bins=bin_edges, labels=bin_labels, right=False)

In [8]:
df_val

Unnamed: 0,new_class_id,xcn,ycn,wn,hn,img,img_w,img_h,image_path,label_path,class_name,bbox_area,bbox_size_category
0,0,0.104688,0.515258,0.112500,0.152582,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,4679.995117,Medium
1,0,0.741406,0.524648,0.098438,0.171362,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,4599.013672,Medium
2,0,0.459375,0.311033,0.106250,0.265258,11303,640.0,426.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,7683.994141,Medium
3,1,0.113008,0.516769,0.038066,0.033807,3644,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,2507.913330,Medium
4,3,0.178125,0.514583,0.142188,0.129167,5235,1280.0,720.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV,16926.000000,Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95016,1,0.406055,0.520774,0.016188,0.020995,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,662.350647,Small
95017,1,0.440119,0.525339,0.023608,0.026472,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,1217.913086,Medium
95018,0,0.455295,0.534467,0.012141,0.048380,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,1144.717529,Medium
95019,1,0.558832,0.561395,0.087012,0.082156,4132,1624.0,1200.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,13930.977539,Large


In [9]:
df_val['bbox_size_category'].value_counts()

bbox_size_category
Medium    35772
Small     21698
Large     20803
Tiny      16748
Name: count, dtype: int64

# 1) Filter by Image Size (≥ 1920x1080)

In [10]:
df_size = df_val[(df_val['img_w']>= 1920)&(df_val['img_h']>= 1080)]

In [11]:
df_size

Unnamed: 0,new_class_id,xcn,ycn,wn,hn,img,img_w,img_h,image_path,label_path,class_name,bbox_area,bbox_size_category
23,3,0.398438,0.616204,0.012500,0.008333,7422,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV,216.000000,Tiny
82,3,0.360156,0.598148,0.011979,0.005556,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV,138.000000,Tiny
83,1,0.486807,0.891343,0.008396,0.014769,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,257.113983,Small
84,0,0.380125,0.944889,0.008495,0.026176,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,461.083679,Small
85,0,0.916901,0.984667,0.007786,0.030657,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,494.994507,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...
94989,0,0.687198,0.270741,0.008251,0.018334,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,1254.673096,Medium
94990,0,0.699027,0.306811,0.025659,0.022881,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,4869.652832,Medium
94991,0,0.737579,0.260000,0.008491,0.016296,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,1147.660278,Medium
94992,0,0.324881,0.365857,0.023200,0.031715,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,6102.750000,Medium


In [12]:
print(f"The number of objects is {len(df_size)}")
print(f"The number of images is {len(df_size['img'].unique())}")

The number of objects is 23156
The number of images is 3154


In [13]:
df_size['bbox_size_category'].value_counts()

bbox_size_category
Small     7458
Medium    7212
Tiny      6449
Large     2037
Name: count, dtype: int64

In [14]:
df_size['class_name'].value_counts()

class_name
Person      15897
Car          4584
UAV          2206
Ship          321
Truck         147
Aircraft        1
Name: count, dtype: int64

# 2) Filter by maximum number of objects present (≤ 100)

In [15]:
object_counts = df_size['img'].value_counts()
selected_images = object_counts[object_counts <= 100].index
df_filtered = df_size[df_size['img'].isin(selected_images)]

In [16]:
df_filtered

Unnamed: 0,new_class_id,xcn,ycn,wn,hn,img,img_w,img_h,image_path,label_path,class_name,bbox_area,bbox_size_category
23,3,0.398438,0.616204,0.012500,0.008333,7422,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV,216.000000,Tiny
82,3,0.360156,0.598148,0.011979,0.005556,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,UAV,138.000000,Tiny
83,1,0.486807,0.891343,0.008396,0.014769,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Car,257.113983,Small
84,0,0.380125,0.944889,0.008495,0.026176,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,461.083679,Small
85,0,0.916901,0.984667,0.007786,0.030657,7344,1920.0,1080.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,494.994507,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...
94989,0,0.687198,0.270741,0.008251,0.018334,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,1254.673096,Medium
94990,0,0.699027,0.306811,0.025659,0.022881,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,4869.652832,Medium
94991,0,0.737579,0.260000,0.008491,0.016296,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,1147.660278,Medium
94992,0,0.324881,0.365857,0.023200,0.031715,10004,3840.0,2160.0,/Users/johnny/Projects/datasets/custom_dataset...,/Users/johnny/Projects/datasets/custom_dataset...,Person,6102.750000,Medium


In [17]:
print(f"The number of objects is {len(df_filtered)}")
print(f"The number of images is {len(df_filtered['img'].unique())}")

The number of objects is 19455
The number of images is 3133


In [18]:
df_filtered['bbox_size_category'].value_counts()

bbox_size_category
Medium    6689
Small     6556
Tiny      4190
Large     2020
Name: count, dtype: int64

In [19]:
import pandas as pd
import os
import shutil
import yaml

base_dir = '/Users/johnny/Projects/datasets/Client_Validation_Set'

subfolders = ['train', 'val', 'test']
folders = ['images', 'labels']
for folder in folders:
    for subfolder in subfolders:
        os.makedirs(os.path.join(base_dir, folder, subfolder), exist_ok=True)

for index, row in df_filtered.iterrows():
    image_path = row['image_path']
    label_path = row['label_path']
    shutil.copy(image_path, os.path.join(base_dir, 'images', 'val'))
    shutil.copy(label_path, os.path.join(base_dir, 'labels', 'val'))
yaml_data = {
    'path': '../small-fast-detector/inference_tools/Evaluation/datasets/Client_Validation_Set',  # PUT HERE THE PATH
    'train': 'images/train',
    'val': 'images/val',
    'test': '',
    'names': {
        0: 'uav',
        1: 'airplane',
        2: 'boat',
        3: 'car',
        4: 'person',
        5: 'truck'
    }

}
with open(os.path.join(base_dir, 'data.yaml'), 'w') as file:
    yaml.dump(yaml_data, file, default_flow_style=False)

print("Dataset organized and YAML file created.")

Dataset organized and YAML file created.


In [20]:
import yaml
import platform
import pandas as pd
import glob
from PIL import Image
import brambox as bb
import numpy as np
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
def check_os():
    os = platform.system()

    if os == 'Darwin':
        return "MacOS"
    elif os == 'Linux':
        return "Linux"
    else:
        return "Unknown OS"
    
operating_system = check_os()


if operating_system == "MacOS":
    root_path = "/Users/johnny/Projects/"
elif operating_system == "Linux":
    root_path = "/home/johnny/Projects/"

In [22]:
from brambox.io.parser.detection import CocoParser

# Load detections

det = bb.io.load(CocoParser, '../../small-fast-detector/runs/detect/val/predictions.json')
print('detections:')
det['image'] = det['image'].astype(str).str.lstrip('0').astype(int)
det['class_label'] = det['class_label'].astype(int)
label_mapping = {
    0: 'person',
    1: 'car',
    2: 'truck',
    3: 'uav',
    4: 'airplane',
    5: 'boat'
}

# Applying the mapping to the 'class_label' column
det['class_label'] = det['class_label'].map(label_mapping)

display(det.head())

detections:


Unnamed: 0,image,class_label,id,x_top_left,y_top_left,width,height,confidence
0,5009,uav,,1120.995,510.087,1731.857,882.608,0.92286
1,5009,airplane,,1125.773,513.662,1728.777,878.329,0.00179
2,5019,person,,0.448,1.436,915.496,2799.004,0.00524
3,5019,person,,35.801,6.081,2470.119,2777.691,0.00136
4,5019,person,,15.692,39.736,1634.106,2752.865,0.00235


In [23]:
from brambox.io.parser.annotation import CocoParser
# Load annotations
anno = bb.io.load(CocoParser(add_image_dims=True), '../../small-fast-detector/inference_tools/Evaluation/datasets/Client_Validation_Set/annotations/instances_val2017.json')
anno['image'] = anno['image'].astype(str).str.lstrip('0').astype(int)

print('annotations:')
display(anno.head())

# save dataframes
det.to_csv('../../small-fast-detector/runs/detect/val/detections.csv', index=False)

anno.to_csv('../../small-fast-detector/runs/detect/val/annotations.csv', index=False)

annotations:


Unnamed: 0,image,class_label,id,x_top_left,y_top_left,width,height,occluded,truncated,lost,difficult,ignore,image_width,image_height
0,10000,person,10389.0,3386.0,2147.0,23.0,13.0,0.0,0.0,False,False,False,3840,2160
1,10000,person,10390.0,2965.0,797.0,70.0,62.0,0.0,0.0,False,False,False,3840,2160
2,10000,person,10391.0,2153.0,843.0,32.0,28.0,0.0,0.0,False,False,False,3840,2160
3,10000,person,10392.0,889.0,1017.0,146.0,51.0,0.0,0.0,False,False,False,3840,2160
4,10000,boat,10393.0,454.0,241.0,104.0,33.0,0.0,0.0,False,False,False,3840,2160


In [24]:
from tqdm.notebook import tqdm


def calculate_area(row):
    return row['width'] * row['height']

def iou(box_a, box_b):
    xA = max(box_a[0], box_b[0])
    yA = max(box_a[1], box_b[1])
    xB = min(box_a[2], box_b[2])
    yB = min(box_a[3], box_b[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)

    boxAArea = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
    boxBArea = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def calculate_map(detected, actual, class_labels):
    aps = []
    if detected.empty or 'class_label' not in detected.columns:
        return 0

    for label in class_labels:
        if label not in detected['class_label'].values:
            aps.append(0)
            continue

        dc = detected[detected.class_label == label]
        ac = actual[actual.class_label == label]

        ap_coco = []
        for iou_threshold in range(50, 100, 5):
            if dc.empty:
                ap_coco.append(0)
                continue

            pr = bb.stat.pr(dc, ac, iou_threshold / 100, smooth=True)
            ap_coco.append(bb.stat.auc_interpolated(pr))

        aps.append(sum(ap_coco) / len(ap_coco))

    mAP_coco = sum(aps) / len(aps) if aps else 0
    return mAP_coco

def calculate_pr_curve(detected, actual, iou_threshold):
    """ Calcula la curva PR para un umbral de IoU específico. """
    matched_det = bb.stat.match_det(detected, actual, threshold=iou_threshold, 
                                    criteria=bb.stat.coordinates.iou, 
                                    ignore=bb.stat.IgnoreMethod.SINGLE)
    pr_curve = bb.stat.pr(matched_det, actual)
    return pr_curve

def calculate_recall_precision(tp, fn, fp):
    """ Calcula el recall y la precisión. """
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    return recall, precision

def calculate_ap(recalls, precisions):
    """ Calcula el Average Precision (AP) a partir de las curvas de recall y precision. """
    recalls = [0] + recalls + [1] 
    precisions = [0] + precisions + [0] 

    ap = np.sum((recalls[i] - recalls[i - 1]) * precisions[i] for i in range(1, len(recalls)))
    return ap

def calculate_metrics(detected, actual, class_labels, iou_threshold=0.5):
    precision = []
    recall = []

    for label in class_labels:
        tp_per_class = 0
        fp_per_class = 0
        fn_per_class = 0
        detected_class = detected[detected['class_label'] == label]
        actual_class = actual[actual['class_label'] == label]

        for _, det_row in detected_class.iterrows():
            box_det = [det_row['x_top_left'], det_row['y_top_left'],
                       det_row['x_top_left'] + det_row['width'], 
                       det_row['y_top_left'] + det_row['height']]

            best_iou = 0
            for _, act_row in actual_class.iterrows():
                box_act = [act_row['x_top_left'], act_row['y_top_left'],
                           act_row['x_top_left'] + act_row['width'], 
                           act_row['y_top_left'] + act_row['height']]
                current_iou = iou(box_det, box_act)
                best_iou = max(best_iou, current_iou)

            if best_iou >= iou_threshold:
                tp_per_class += 1
            else:
                fp_per_class += 1

        # Calcular FN
        fn_per_class = len(actual) - tp_per_class
    
        # Calcular recall y precisión
        recall_per_class, precision_per_class = calculate_recall_precision(tp_per_class, fn_per_class, fp_per_class)
        recall.append(recall_per_class)
        precision.append(precision_per_class)
    
    recall = np.mean(recall)
    precision = np.mean(precision)

    # Calcular AP por clase y luego calcular el promedio (mAP)
    mAP = calculate_map(detected, actual, class_labels)

    return recall, precision, mAP

image_stats = pd.DataFrame(columns=['name', 'width', 'height', 'num_of_gt_objects', 'lowest_area', 'biggest_area', 'num_of_predicted_objects', 'recall', 'precision', 'mAP'])

det_grouped = det.groupby('image', observed=True)
anno_grouped = anno.groupby('image', observed=True)

class_labels = anno['class_label'].unique().tolist()

total_images = set(anno['image'].unique().tolist() + det['image'].unique().tolist())
total_images = sorted(total_images)
for image_id in tqdm(total_images):
    width = height = num_of_gt_objects = lowest_area = biggest_area = num_of_predicted_objects = np.nan
    recall = precision = mAP = 0

    if image_id in anno_grouped.groups:
        image_data = anno_grouped.get_group(image_id).copy()
        width = image_data.iloc[0]['image_width']
        height = image_data.iloc[0]['image_height']
        num_of_gt_objects = len(image_data)
        image_data['area'] = image_data.apply(calculate_area, axis=1)
        lowest_area = image_data['area'].min() if not image_data['area'].empty else np.nan
        biggest_area = image_data['area'].max() if not image_data['area'].empty else np.nan

    if image_id in det_grouped.groups:
        det_data = det_grouped.get_group(image_id)
        num_of_predicted_objects = len(det_data)
        recall, precision, mAP = calculate_metrics(det_data, image_data, class_labels)

    new_row = {
        'name': image_id,
        'width': width,
        'height': height,
        'num_of_gt_objects': num_of_gt_objects,
        'lowest_area': lowest_area,
        'biggest_area': biggest_area,
        'num_of_predicted_objects': num_of_predicted_objects,
        'recall': recall,
        'precision': precision,
        'mAP': mAP,
        
    }
    image_stats = pd.concat([image_stats, pd.DataFrame([new_row])], ignore_index=True)


  0%|          | 0/3133 [00:00<?, ?it/s]

  image_stats = pd.concat([image_stats, pd.DataFrame([new_row])], ignore_index=True)


In [36]:
image_stats[image_stats['name']==7962]

Unnamed: 0,name,width,height,num_of_gt_objects,lowest_area,biggest_area,num_of_predicted_objects,recall,precision,mAP
2446,7962,3732,1840,1,640.0,640.0,3,0.0,0.0,0.0


In [25]:
image_stats.sort_values(by=['name'], ascending=True, inplace=True)
image_stats

Unnamed: 0,name,width,height,num_of_gt_objects,lowest_area,biggest_area,num_of_predicted_objects,recall,precision,mAP
0,5009,3000,2000,1,1574832.0,1574832.0,2,0.166667,0.166667,0.166667
1,5019,4256,2832,1,769872.0,769872.0,22,0.166667,0.041667,0.148886
2,5024,1920,1080,1,1248928.0,1248928.0,2,0.166667,0.166667,0.166667
3,5025,2048,1365,1,467250.0,467250.0,2,0.166667,0.166667,0.166667
4,5034,2048,1365,1,1446720.0,1446720.0,1,0.166667,0.166667,0.166667
...,...,...,...,...,...,...,...,...,...,...
3128,10089,3840,2160,1,418.0,418.0,25,0.000000,0.000000,0.000000
3129,10090,3840,2160,2,143.0,330.0,29,0.000000,0.000000,0.000000
3130,10091,3840,2160,1,391.0,391.0,29,0.000000,0.000000,0.000000
3131,10092,3840,2160,1,204.0,204.0,12,0.000000,0.000000,0.000000


In [26]:
image_stats.to_csv('../../small-fast-detector/runs/detect/val/image_stats.csv', index=False)

In [33]:
image_stats['num_of_gt_objects'].unique().max()

100

In [37]:
#top-1000k recall
df_filtered_2k = image_stats.sort_values(by=['mAP'], ascending=False).head(2000)

In [38]:
import pandas as pd
import os
import shutil
import yaml

# CHANGE DATASET HERE
dataframe = df_filtered_2k
base_dir = '/Users/johnny/Projects/datasets/Client_Validation_Set'

subfolders = ['train', 'val', 'test']
folders = ['images', 'labels']
for folder in folders:
    for subfolder in subfolders:
        os.makedirs(os.path.join(base_dir, folder, subfolder), exist_ok=True)

for index, row in dataframe.iterrows():
    image_path = row['image_path']
    label_path = row['label_path']
    shutil.copy(image_path, os.path.join(base_dir, 'images', 'val'))
    shutil.copy(label_path, os.path.join(base_dir, 'labels', 'val'))
yaml_data = {
    'path': '../small-fast-detector/inference_tools/Evaluation/datasets/Client_Validation_Set',  # PUT HERE THE PATH
    'train': 'images/train',
    'val': 'images/val',
    'test': '',
    'names': {
        0: 'uav',
        1: 'airplane',
        2: 'boat',
        3: 'car',
        4: 'person',
        5: 'truck'
    }

}
with open(os.path.join(base_dir, 'data.yaml'), 'w') as file:
    yaml.dump(yaml_data, file, default_flow_style=False)

print("Dataset organized and YAML file created.")

Unnamed: 0,name,width,height,num_of_gt_objects,lowest_area,biggest_area,num_of_predicted_objects,recall,precision,mAP
374,5766,1920,1080,3,920.0,5800.0,90,0.222222,0.023551,0.396203
127,5519,1920,1080,14,1098.0,7888.0,113,0.273810,0.122475,0.371177
419,5811,1920,1080,11,588.0,16274.0,54,0.348485,0.182828,0.369782
582,5974,1920,1080,4,1428.0,6192.0,86,0.375000,0.165281,0.369483
1690,7082,1920,1080,4,308.0,14240.0,62,0.333333,0.116162,0.367617
...,...,...,...,...,...,...,...,...,...,...
3024,9985,3840,2160,7,1596.0,48114.0,40,0.333333,0.093056,0.067598
1610,7002,1920,1080,2,420.0,1749.0,55,0.250000,0.017999,0.067414
1887,7279,1920,1080,1,1134.0,1134.0,34,0.500000,0.014706,0.067413
2434,7924,1920,1080,2,9020.0,19760.0,9,0.083333,0.166667,0.067327


In [28]:
# merge image_path and label_path from df_filtered
image_stats = pd.merge(image_stats, df_filtered[['img', 'image_path', 'label_path']], left_on='name', right_on='img', how='left')

In [30]:
image_stats.to_csv('../../small-fast-detector/runs/detect/val/image_stats.csv', index=False)