In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.lines as mlines
import numpy as np
# Read the CSV file
df = pd.read_csv('per_video_accuracies_with_samples.csv')
# Sort the DataFrame by accuracy (low to high) while preserving the 'N' grouping
df = df.sort_values(by=['N', 'accuracy'])

n5_vids = (np.unique(df[df.N == 5]['video_path'].values))
for n5_vid in n5_vids:
    print((df[df.video_path == n5_vid]))
    
n4_vids = (np.unique(df[df.N == 4]['video_path'].values))
for n4_vid in n4_vids:
    print((df[df.video_path == n4_vid]))
    
n3_vids = (np.unique(df[df.N == 3]['video_path'].values))
for n3_vid in n3_vids:
    print((df[df.video_path == n3_vid]))
    
n2_vids = (np.unique(df[df.N == 2]['video_path'].values))
for n2_vid in n2_vids:
    print((df[df.video_path == n2_vid]))

plt.figure(figsize=(7, 5))

# Unique N values
unique_N = sorted(df['N'].unique())

# Prepare data for box plots
data_for_boxplots = [df[df['N'] == n]['accuracy'] for n in unique_N]

# Create the boxplot
boxplot = plt.boxplot(
    data_for_boxplots,
    positions=unique_N,
    widths=0.5,
    patch_artist=True,
    showmeans=False,  # Disable mean display
    showcaps=True,
    showfliers=False,
    boxprops=dict(facecolor='lightgrey', color='black', alpha=0.3),
    medianprops=dict(color='red', linewidth=2),  # Customize median line
    whiskerprops=dict(color='black'),
    capprops=dict(color='black')
)


# Overlay the scatter plot for individual points
for video_path in df['video_path'].unique():
    subset = df[df['video_path'] == video_path]
    plt.scatter(subset['N'], subset['accuracy'], alpha=1, color='blue')

# Set xticks to match the unique values of 'N'
plt.xticks(unique_N)

# Customize the labels
plt.xlabel('N (Number of People)', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)

# Adjust layout to ensure nothing is cut off
plt.tight_layout()

median_legend = mlines.Line2D([], [], color='red', linestyle='-', label='Median')

# Add the custom legend
plt.legend(handles=[median_legend])


# Save the figure
plt.savefig('plots/plot_accuracies_with_box_scatter_matplotlib.png', dpi=150, bbox_inches='tight')

# Show the plot
plt.show()

In [None]:

median_values = [line.get_ydata()[0] for line in boxplot['medians']]

In [None]:
median_values