In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
# Lista dei nomi dei file CSV dei modelli
metrics_dir_path = Path.cwd() / "metrics"

if not metrics_dir_path.is_dir():
    raise Exception("Directory metrics non trovata!")

datasets_dir_paths = {metrics_dir_path / "full_custom_metrics", metrics_dir_path / "only_0_metrics", metrics_dir_path / "only_mivia_metrics"}

for dataset_dir_path in list(datasets_dir_paths):
    if not dataset_dir_path.is_dir():
        raise Exception(f"Directory {dataset_dir_path} non trovata!")

SELECTED_DIR_PATH = metrics_dir_path / "only_0_metrics" ####################### Cambiare questa riga per cambiare dataset #######################

if not SELECTED_DIR_PATH in datasets_dir_paths:
    raise Exception(f"Directory {SELECTED_DIR_PATH} non trovata!")


model_files = list(SELECTED_DIR_PATH.glob("*.csv"))

if len(model_files) == 0:
    raise Exception(f"Non sono stati trovati file CSV in {SELECTED_DIR_PATH}!")

# Creiamo una lista di DataFrame, uno per ciascun modello
model_dfs = [pd.read_csv(file) for file in model_files]

# Grafico a barre per le metriche precision, recall e nand
metrics = ['precision', 'recall', 'nand']


In [None]:
model_dfs[-1].head()

In [None]:
# Create a histogram that shows the differences between models on the metricsù
# get tre type of gray
def histogram(df, title):
    grays = ['#444444', '#CCCCCC', '#999999']
    ax = df.plot.bar(figsize=(10, 5), rot=0, color=grays)
    ax.legend(loc='upper right', bbox_to_anchor=(1.0, 0.21))
    ax.set_title(title)
    ax.grid(axis='y')


# Compute the mean over the column of each metrics
mean_metrics = [df[metrics].mean() for df in model_dfs]
# Create a DataFrame with the mean of each metrics
mean_metrics_df = pd.DataFrame(mean_metrics, index=[file.stem for file in model_files])

# Histogram based on the mean metric over each weights file
histogram(mean_metrics_df, 'Mean metrics for each model')

# Get the row that maximizes the fds
max_fds = [df[df["fds"] == df["fds"].max()][metrics].drop_duplicates() for df in model_dfs]

# Merge the rows in a DataFrame, one for each model and each metric separeted in a different column
max_fds_df = pd.concat(max_fds, axis=0)
max_fds_df.index = [file.stem for file in model_files]

# Histogram based on the metrics of the best weights file (the one that maximizes the fds)
histogram(max_fds_df, "Maximized metrics for each model")