### USAGE
- This script is used to visualize metrics from different models put in `best_models_metrics.csv` from CNN and MLP models.
- Run all code blocks to plot metrics.
- The name of the model visible on the generated graph comes from the `--output` argument of either the `mlp_baseline.py` or `cnn_baseline.py` script. Therefore, it is particularly important to give it a descriptive name when running those scripts.

### HOW IT WORKS
- `explore_class_seq_xvalidation` and `explore_class_struct_xvalidation` after running update the `best_models_metrics.csv` model with the metrics from the explored models.
- Each row is treated as a different case to compare.

In [1]:
import pickle
from matplotlib import pyplot as plt
import torch
import pandas as pd
import plotly.graph_objects as go

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
# Draw metrics comparison between shuffled, clustered models for the CNN and MLP:
models_metrics_path = "/projects/0/einf2380/data/reports/metrics_csv/hla0201_9mers_experiments_only_clustered.csv"
# models_metrics_path = "best_models_metrics.csv"
df = pd.read_csv(models_metrics_path)
fig = go.Figure()
# x = ["AUC", "MCC", "Accuracy", "f1_score"]
x = ["AUC", "Accuracy"]
for model_path in df["model_path"]:
    fig.add_trace(go.Bar(
        x=x,
        y=df.loc[df["model_path"]==model_path,
            # ["auc_mean", "mcc_mean", "accuracy_mean", "f1_mean"]].values[0],
            ["auc_mean", "accuracy_mean"]].values[0],
        name=model_path,
        error_y=dict(
            type="data",
            array=df.loc[df["model_path"]==model_path,
                ["auc_std", "accuracy_std"]].values[0],
        )
    ))
fig.update_layout(
    barmode="group",
    title_text="Performance des modèles structuraux et sequentiels sur des motifs de séquences différents",
    autosize=False,
    width=1500,
    height=700
)
fig.show()
fig.write_image(
    f"/projects/0/einf2380/data/reports/figures/exp_results/{models_metrics_path.split('/')[-1].replace('.csv', '')}.svg",
)

In [14]:
def plot_hist_comparison(title, df, save=False):
    fig = go.Figure()
    x = ["Sensitivity", "Specificity", "AUC", "MCC", "Accuracy"]
    for model_path in df["model_path"]:
        fig.add_trace(go.Bar(
            x=x,
            y=df.loc[df["model_path"]==model_path,
                ["sensitivity_mean", "specificity_mean", "auc_mean", "mcc_mean", "accuracy_mean"]].values[0],
            name=model_path,
            error_y=dict(
                type="data",
                array=df.loc[df["model_path"]==model_path,
                    ["sensitivity_std", "specificity_std", "auc_std", "mcc_std", "accuracy_std"]].values[0],
            )
        ))
    fig.update_layout(
        barmode="group",
        title_text=title,
    )
    fig.show()
    if save:
        plt.savefig(save)

models_metrics_path = "./best_models_metrics.csv"
df = pd.read_csv(models_metrics_path)
plot_hist_comparison('Test title', df)