<a href="https://colab.research.google.com/github/MikLay/drone-detection/blob/main/benchmark_yolov5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Experiment #3 bencmark YOLOv5 models
To train YoLov5/n/s/m/l:

* Get results
* Perform Friedman test
* Perform Nemenyi test
* Select best YOLOv5 model

## Friedman and Nemenyi test for YOLOv5 models

In [25]:
%pip install -q scikit_posthocs

In [32]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from scipy.stats import rankdata
import scikit_posthocs as sp

In [33]:
def generate_scores(method, method_args, data, labels):
    pairwise_scores = method(data, **method_args) # Matrix for all pairwise comaprisons
    pairwise_scores.set_axis(labels, axis='columns', inplace=True) # Label the cols
    pairwise_scores.set_axis(labels, axis='rows', inplace=True) # Label the rows, note: same label as pairwise combinations
    return pairwise_scores

In [46]:
alpha = 0.05  # Set the significance level
precision = np.array([
    [0.925, 0.923, 0.931, 0.929], # n
    [0.943, 0.947, 0.941, 0.945], # s
    [0.943, 0.941, 0.937, 0.94], # m
    [0.928, 0.924, 0.924, 0.927], # l
])
stat, p = stats.friedmanchisquare(*precision)
print(f'p-value: {p}')

reject = p <= alpha
print("Should I reject H0 at the", (1-alpha)*100, "% confidence level?", reject)

p-value: 0.014502247354056415
Should I reject H0 at the 95.0 % confidence level? True


In [48]:
nemenyi_scores = generate_scores(sp.posthoc_nemenyi_friedman, {}, precision, pd.Index(['n', 's', 'l', 'm'], dtype='object'))

  pairwise_scores.set_axis(labels, axis='columns', inplace=True) # Label the cols
  pairwise_scores.set_axis(labels, axis='rows', inplace=True) # Label the rows, note: same label as pairwise combinations


In [49]:
nemenyi_scores

Unnamed: 0,n,s,l,m
n,1.0,0.9,0.593413,0.9
s,0.9,1.0,0.9,0.9
l,0.593413,0.9,1.0,0.747133
m,0.9,0.9,0.747133,1.0


In [51]:
recall = np.array([
    [0.902, 0.9, 0.904, 0.86], # n
    [0.933, 0.936, 0.930, 0.931], # s
    [0.901, 0.904, 0.905, 0.903], # m
    [0.906, 0.911, 0.905, 0.908], # l
])
stat, p = stats.friedmanchisquare(*recall)
print(f'p-value: {p}')

reject = p <= alpha
print("Should I reject H0 at the", (1-alpha)*100, "% confidence level?", reject)

p-value: 0.01351153166132882
Should I reject H0 at the 95.0 % confidence level? True


In [52]:
nemenyi_scores = generate_scores(sp.posthoc_nemenyi_friedman, {}, recall, pd.Index(['n', 's', 'l', 'm'], dtype='object'))

  pairwise_scores.set_axis(labels, axis='columns', inplace=True) # Label the cols
  pairwise_scores.set_axis(labels, axis='rows', inplace=True) # Label the rows, note: same label as pairwise combinations


In [53]:
nemenyi_scores

Unnamed: 0,n,s,l,m
n,1.0,0.670273,0.9,0.9
s,0.670273,1.0,0.823993,0.516551
l,0.9,0.823993,1.0,0.9
m,0.9,0.516551,0.9,1.0


In [54]:
alpha = 0.05  # Set the significance level
inference_time = np.array([
    [2.1, 2.4, 2.3, 2.1], # n
    [2.7, 2.8, 2.4, 2.3], # s
    [3.4, 4.2, 3.5, 3.8], # m
    [6.1, 6.4, 6.2, 6.1], # l
])
stat, p = stats.friedmanchisquare(*inference_time)
print(f'p-value: {p}')

reject = p <= alpha
print("Should I reject H0 at the", (1-alpha)*100, "% confidence level?", reject)

p-value: 0.007383160505359769
Should I reject H0 at the 95.0 % confidence level? True


In [56]:
nemenyi_scores = generate_scores(sp.posthoc_nemenyi_friedman, {}, inference_time, pd.Index(['n', 's', 'l', 'm'], dtype='object'))

  pairwise_scores.set_axis(labels, axis='columns', inplace=True) # Label the cols
  pairwise_scores.set_axis(labels, axis='rows', inplace=True) # Label the rows, note: same label as pairwise combinations


In [57]:
nemenyi_scores

Unnamed: 0,n,s,l,m
n,1.0,0.065622,0.823993,0.9
s,0.065622,1.0,0.354855,0.065622
l,0.823993,0.354855,1.0,0.823993
m,0.9,0.065622,0.823993,1.0
