In [23]:
import pickle
import numpy as np
import pandas as pd
from utils_martina.my_utils import *

In [24]:
logs = ['19316', '19708', '1796', '22248', '12236', '20296']
conta = [2852, 2870, 2246, 1622, 1622, 1238]

In [31]:
metrics_keys = ['ID', 'Correctness', 'Fidelity', 'Implausibility', 'Dissimilarity', 'Accuracy']
dfs = []

for i in range(len(logs)):
    rows = []

    with open(f"output/cf_dict/metrics_{logs[i]}-Martina.pkl", 'rb') as f:
        metrics = pickle.load(f)

    for eval_id in metrics['ID'].keys():
        row = {"Evaluator": eval_id}
        acc = np.array(metrics['Accuracy'][eval_id])

        for metric in metrics_keys[1:]:
            vals = np.array(metrics[metric][eval_id])

            # Implausibility and Dissimilarity are only computed for well-predicted instances
            if metric in ['Implausibility', 'Dissimilarity']:
                filtered_vals = vals[acc == True]
                mean_val = np.mean(filtered_vals) if filtered_vals.size > 0 else np.nan
            else:
                mean_val = np.mean(vals) if vals.size > 0 else np.nan
            row[metric] = round(mean_val, 4) if not np.isnan(mean_val) else np.nan
            
        rows.append(row)

    summary_df = pd.DataFrame(rows).set_index("Evaluator")
    dfs.append(summary_df)

In [72]:
# Verifica che tutti abbiano la stessa forma
shape = dfs[0].shape
assert all(df.shape == shape for df in dfs), "DataFrame shapes must match"

# Stack in un array 3D
array_3d = np.array([df.values for df in dfs])  # shape: (n_df, rows, cols)
weights = np.array(conta).reshape(-1, 1, 1)   # shape: (n_df, 1, 1)

# Verifica se ci sono NaN in una qualsiasi posizione tra i DataFrame
nan_mask = np.any(np.isnan(array_3d), axis=0)  # shape: (rows, cols)

# Calcolo media pesata
weighted_sum = np.sum(array_3d * weights, axis=0)
total_weight = np.sum(weights)

weighted_avg = weighted_sum / total_weight

# Dove c'era almeno un NaN → mettiamo NaN anche nella media
weighted_avg[nan_mask] = np.nan

# Ricostruzione DataFrame
result_df = pd.DataFrame(np.round(weighted_avg,4), columns=dfs[0].columns, index=dfs[0].index)
print(result_df)


           Correctness  Fidelity  Implausibility  Dissimilarity  Accuracy
Evaluator                                                                
0               0.2254    0.1852          0.2749         0.2780    0.8906
1               0.3714    0.2722          0.2112         0.2150    0.8906
2               0.8908    0.8904          0.0000         0.8951    0.8906
3               0.9966    0.7812          0.0000         0.8468    0.8906
4               0.3080    0.1714             NaN            NaN    0.8906


In [71]:
eval_manager_path = "..\\..\\explainability\GRETEL-repo\\output\\eval_manager\\"

with open(f'{eval_manager_path}{logs[0]}-Martina.pkl', 'rb') as f:
    eval_manager = pickle.load(f)

for i, evaluator in enumerate(eval_manager._evaluators):
    explainer = evaluator._explainer
    name = explainer.name.split('-')[0]
    if "Temporal" in name:
        if 'NoStability' not in name:
            print(f"({i}) {name}: ({explainer.alpha}, {explainer.beta}, {explainer.gamma})")
        else:
            print(f"({i}) {name}: ({explainer.alpha}, {explainer.beta}, X)")
    else:
        print(f"({i}) {name}")

(0) DataDrivenBidirectionalSearchExplainer
(1) ObliviousBidirectionalSearchExplainer
(2) TemporalDCESExplainer: (0.7, 0.2, 0.1)
(3) TemporalDCESExplainerNoStability: (1, 0, X)
(4) GNNMOExp


In [73]:
sum(conta)

12450