## Note -Running this notebook assuming the explanation dicts are created

In [None]:
import pandas as pd
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
export_dir = os.getcwd()
from pathlib import Path
import pickle
from collections import defaultdict
import time
import torch
import torch.nn as nn
import copy
import torch.nn.functional as F
import optuna
import logging
import matplotlib.pyplot as plt
import ipynb
import importlib
import sys
from tqdm import tqdm
import multiprocessing
from functools import partial
from concurrent.futures import ProcessPoolExecutor
import torch.multiprocessing as mp
from openpyxl.cell.cell import MergedCell
import re

In [None]:
data_names = ["ML1M", "Yahoo", "Pinterest"]
recommender_names = ["MLP", "VAE", "NCF"]
expl_names = ['cosine', 'accent', 'shap', 'deep_shap', 'lime', 'lire', 'fia', 'lxr', 'PI_base', 'SPINRec']

# Create plots directory
plots_dir = Path('NAME') #Fill name
plots_dir.mkdir(exist_ok=True)

export_dir = Path(os.getcwd()) 
checkpoints_path = Path(export_dir.parent, "check")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
output_type_dict = {
    "VAE":"multiple",
    "MLP":"single",
    "NCF": "single",
}

num_users_dict = {
    "ML1M":6037,
    "ML1M_demographic":6037,
    "Yahoo":13797, 
    "Pinterest":19155
}

num_items_dict = {
    "ML1M":3381,
    "ML1M_demographic":3381,
    "Yahoo":4604, 
    "Pinterest":9362
}

demographic_dict = {
    "ML1M_demographic": True,
    "ML1M":False,
    "Yahoo":False, 
    "Pinterest":False
}

features_dict = {
    "ML1M_demographic": 3421,
    "ML1M":None,
    "Yahoo":None, 
    "Pinterest":None
}

recommender_path_dict = {
    ("ML1M","VAE"): Path(checkpoints_path, "VAE_ML1M_0.0007_128_10.pt"),
    ("ML1M","MLP"):Path(checkpoints_path, "MLP1_ML1M_0.0076_256_7.pt"),
    ("ML1M","NCF"):Path(checkpoints_path, "NCF_ML1M_5e-05_64_16.pt"),

    ("ML1M_demographic","VAE"): Path(checkpoints_path, "VAE_ML1M_demographic_0.0001_64_6_18.pt"),
    ("ML1M_demographic","MLP"):Path(checkpoints_path, "MLP_ML1M_demographic_0.0_64_0_28.pt"),
    ("ML1M_demographic","NCF"):Path(checkpoints_path, "NCF_ML1M_demographic_0.00023_32_3_2.pt"),
    
    ("Yahoo","VAE"): Path(checkpoints_path, "VAE_Yahoo_0.0001_128_13.pt"),
    ("Yahoo","MLP"):Path(checkpoints_path, "MLP2_Yahoo_0.0083_128_1.pt"),
    ("Yahoo","NCF"):Path(checkpoints_path, "NCF_Yahoo_0.001_64_21_0.pt"),
    
    ("Pinterest","VAE"): Path(checkpoints_path, "VAE_Pinterest_12_18_0.0001_256.pt"),
    ("Pinterest","MLP"):Path(checkpoints_path, "MLP_Pinterest_0.0062_512_21_0.pt"),
    ("Pinterest","NCF"):Path(checkpoints_path, "NCF2_Pinterest_9e-05_32_9_10.pt"),    
}


hidden_dim_dict = {
    ("ML1M","VAE"): None,
    ("ML1M","MLP"): 32,
    ("ML1M","NCF"): 8,

    ("ML1M_demographic","VAE"): None,
    ("ML1M_demographic","MLP"): 32,
    ("ML1M_demographic","NCF"): 8,
    
    ("Yahoo","VAE"): None,
    ("Yahoo","MLP"):32,
    ("Yahoo","NCF"):8,
    
    ("Pinterest","VAE"): None,
    ("Pinterest","MLP"):512,
    ("Pinterest","NCF"): 64,
}

LXR_checkpoint_dict = {
    ("ML1M","VAE"): ('LXR_ML1M_VAE_26_38_128_3.185652725834087_1.420642300151426.pt',128),
    ("ML1M","MLP"): ('LXR_ML1M_MLP_12_39_64_11.59908096547193_0.1414854294885049.pt',64),
    ("ML1M","NCF"): ('LXR_ML1M_NCF_neg_13_39_64_0_17.45690446559206.pt',64),

    ("ML1M_demographic","VAE"): ('LXR_ML1M_demographic_VAE_comb_0_28_128_4.336170186907191_1.7621772323665827.pt',128),
    ("ML1M_demographic","MLP"): ('LXR_ML1M_demographic_MLP_pos_12_17_64_5.146220684658705_0.pt',64),
    ("ML1M_demographic","NCF"): ("LXR_ML1M_demographic_NCF_combined_neg-pos_19_34_128_19.620894652874913_7.7059602612458615.pt",128),
    
    ("Yahoo","VAE"): ('LXR_Yahoo_VAE_neg-1.5pos_combined_19_26_128_18.958765029913238_4.92235962483309.pt',128),
    ("Yahoo","MLP"):('LXR_Yahoo_MLP_neg-pos_combined_last_29_37_128_12.40692505393434_0.19367009952856118.pt',128),
    ("Yahoo","NCF"):('LXR_Yahoo_NCF_neg-pos_combined_loss_14_14_32_16.01464392466348_6.880015038643981.pt',32),
    
    ("Pinterest","VAE"): ('LXR_Pinterest_VAE_comb_5_39_32_4.369254579125666_0.9909558815719377.pt',32),
    ("Pinterest","MLP"):('LXR_Pinterest_MLP_0_5_16_10.059416809308486_0.705778173474644.pt',16),
    ("Pinterest","NCF"): ('LXR_Pinterest_NCF_comb_12_39_64_2.6246630808370672_0.04433778750788146.pt',64),
}

In [None]:
from ipynb.fs.defs.baselines_functions import *
importlib.reload(ipynb.fs.defs.baselines_functions)
from ipynb.fs.defs.baselines_functions import *

lime = LimeBase(distance_to_proximity)

from ipynb.fs.defs.recommenders_architecture import *
importlib.reload(ipynb.fs.defs.recommenders_architecture)
from ipynb.fs.defs.recommenders_architecture import *

from ipynb.fs.defs.help_functions import *
importlib.reload(ipynb.fs.defs.help_functions)
from ipynb.fs.defs.help_functions import *

In [None]:
def single_user_metrics(user_vector, user_tensor, item_id, item_tensor, recommender_model, expl_dict, **kw_dict):
    """Calculate metrics for a single user with 5 steps of item masking """

    original_user_tensor = user_tensor.clone()
    original_score = recommender_run(original_user_tensor,recommender_model,item_tensor,item_id, **kw_dict)
    original_score_float = original_score.detach().cpu().numpy() + 1e-9
    if original_score_float <= 1e-9:
        print(f"Warning: Original score is close to zero for item {item_id}.DEL/INS might be unstable.")
        original_score_float = 1e-9

    # tensor used throughout (target item masked out)
    user_tensor_for_expl = user_tensor.clone()
    user_tensor_for_expl[item_id] = 0

    # --- metric containers ---
    num_steps = 5
    bins = range(1, num_steps + 1)

    POS_at_1   = [0] * num_steps
    POS_at_5   = [0] * num_steps
    POS_at_10  = [0] * num_steps
    POS_at_20  = [0] * num_steps
    POS_at_50  = [0] * num_steps
    POS_at_100 = [0] * num_steps

    DEL = [0.0] * num_steps
    INS = [0.0] * num_steps
    NDCG = [0.0] * num_steps

    # ---- prepare explanation list -----
    # Get sorted items by importance
    # Assuming expl_dict is a list of (item_id, score) tuples from the explanation method
    
    if not isinstance(expl_dict, list):
        try:
             # If it has items method (dict-like), convert; else try casting to list
            sim_items_list = (list(expl_dict.items())
                              if hasattr(expl_dict, 'items')
                              else list(expl_dict))
        except Exception as e:
             # Return empty/default metrics or raise error
            print(f"Error converting expl_dict to list: {e}")
            empty_res = [np.zeros(num_steps)] * 9
            return empty_res
    else:
        sim_items_list = list(expl_dict)

    try:
        # Sort by score (descending for POS, ascending for NEG)
        # Handle potential errors if items in list are not tuples or don't have score at index 1
        valid_items = [it for it in sim_items_list
                       if isinstance(it, (list, tuple)) and len(it) >= 2]
        if len(valid_items) != len(sim_items_list):
            print("Warning: Some items in expl_dict are invalid; filtered out.")
        POS_sim_items = sorted(valid_items, key=lambda it: it[1], reverse=True)
    except (IndexError, TypeError) as e:
        print(f"Error sorting explanation list: {e}")
        empty_res = [np.zeros(num_steps)] * 9
        return empty_res

    # For each step (1 to 5 items)
    for i, k in enumerate(bins):
        # Masks for top-k important items (most important)
        POS_mask_tensor = torch.zeros_like(user_tensor_for_expl,
                                           dtype=torch.float32,
                                           device=kw_dict['device'])
        k_pos = min(k, len(POS_sim_items))
        if k_pos > 0:
            # Check if POS_sim_items is empty before slicing/list comprehension
            top_k_pos_indices = [p[0] for p in POS_sim_items[:k_pos]]
            if top_k_pos_indices:
                POS_mask_tensor[top_k_pos_indices] = 1

        # tensors for metrics
        DEL_tensor = user_tensor_for_expl * (1 - POS_mask_tensor)
        INS_tensor = user_tensor_for_expl * POS_mask_tensor
        POS_ranking_tensor = DEL_tensor

        # ranked list after deleting top-k
        POS_ranked_list = get_top_k(POS_ranking_tensor,
                                    user_tensor_for_expl,
                                    recommender_model, **kw_dict)
        ranked_keys = list(POS_ranked_list.keys())
        POS_index = (ranked_keys.index(item_id) + 1
                     if item_id in ranked_keys
                     else kw_dict['num_items'])

        # ---- P@K -----
        POS_at_1[i]   = 1 if POS_index <= 1   else 0
        POS_at_5[i]   = 1 if POS_index <= 5   else 0
        POS_at_10[i]  = 1 if POS_index <= 10  else 0
        POS_at_20[i]  = 1 if POS_index <= 20  else 0
        POS_at_50[i]  = 1 if POS_index <= 50  else 0
        POS_at_100[i] = 1 if POS_index <= 100 else 0

        # --- DEL ---
        score_del = recommender_run(DEL_tensor, recommender_model,
                                    item_tensor, item_id, **kw_dict)
        if np.isnan(original_score_float) or original_score_float == 0:
            DEL[i] = np.nan
        else:
            DEL[i] = float(score_del.detach().cpu().numpy()
                           / original_score_float)

        # --- INS  ---
        score_ins = recommender_run(INS_tensor, recommender_model, item_tensor, item_id, **kw_dict)
        if np.isnan(original_score_float) or original_score_float == 0:
            INS[i] = np.nan
        else:
            INS[i] = float(score_ins.detach().cpu().numpy()
                           / original_score_float)
        # ---- NDCG ----------------------------------------------------------
        NDCG[i] = get_ndcg(ranked_keys, item_id, **kw_dict)

    # ---- return -----------------------------------------------------------
    return [np.array(DEL),
            np.array(INS),
            np.array(NDCG),
            np.array(POS_at_1),
            np.array(POS_at_5),
            np.array(POS_at_10),
            np.array(POS_at_20),
            np.array(POS_at_50),
            np.array(POS_at_100)]

In [None]:
def eval_one_expl_type(expl_name):
    print(f' ============ Start explaining {data_name} {recommender_name} by {expl_name} ============')

    num_steps = 5
    users_DEL  = np.zeros(num_steps)
    users_INS  = np.zeros(num_steps)
    NDCG       = np.zeros(num_steps)

    POS_at_1   = np.zeros(num_steps)
    POS_at_5   = np.zeros(num_steps)
    POS_at_10  = np.zeros(num_steps)
    POS_at_20  = np.zeros(num_steps)
    POS_at_50  = np.zeros(num_steps)
    POS_at_100 = np.zeros(num_steps)

    # Load explanation dicts
    if expl_name == 'SPINRec':
        spinrec_dir = Path(os.getcwd()).parent / "processed_data" / data_name / "PI" / recommender_name / "sample_random_user"
    else:
        with open(Path(files_path, f'{recommender_name}_{expl_name}_expl_dict.pkl'), 'rb') as handle:
            expl_dict = pickle.load(handle)

    recommender.eval()

    with torch.no_grad():
        for i in tqdm(range(test_array.shape[0])):
        # for i in tqdm(range(3)):
            user_vector = test_array[i].copy()
            user_tensor = torch.FloatTensor(user_vector).to(device)
            user_id = int(test_data.index[i])

            item_id = int(get_user_recommended_item(user_tensor, recommender, **kw_dict).detach().cpu().numpy())
            item_vector = items_array[item_id]
            item_tensor = torch.FloatTensor(item_vector).to(device)

            user_vector[item_id] = 0
            user_tensor[item_id] = 0

            # --- SPINRec custom logic ---
            if expl_name == 'SPINRec':
                best_res = None
                for k in range(1, 12):
                    folder_path = spinrec_dir / str(k)
                    pkl_path = folder_path / f'PI_expl_dict_user_{i}.pkl'

                    if not pkl_path.exists():
                        print(f"Missing file: {pkl_path}")
                        continue

                    with open(pkl_path, 'rb') as f:
                        candidate_expl_dict = pickle.load(f)

                    user_expl = candidate_expl_dict.get(user_id)
                    if user_expl is None:
                        print(f"User {user_id} not found in {pkl_path}")
                        continue

                    try:
                        res = single_user_metrics(user_vector.copy(), user_tensor.clone(), item_id, item_tensor, recommender, user_expl, **kw_dict)
                    except Exception as e:
                        print(f"Error evaluating metrics for user {user_id} in trial {k}: {e}")
                        continue

                    if best_res is None:
                        best_res = res
                    else:
                        # Choose best result: max INS, min everything else
                        best_res = [
                            np.maximum(best_res[j], res[j]) if j == 1 else np.minimum(best_res[j], res[j]) ## j ==1 is the location of INS!!!
                            for j in range(len(res))
                        ]

                if best_res is None:
                    print(f"Warning: No valid explanations found for user {user_id} in SPINRec.")
                    continue

                res = best_res
            else:
                # Default explainer logic
                user_expl = expl_dict.get(user_id)
                if user_expl is None:
                    print(f"User {user_id} not found in {expl_name} expl_dict.")
                    continue
                res = single_user_metrics(user_vector, user_tensor, item_id, item_tensor, recommender, user_expl, **kw_dict)

            # Accumulate metrics
            users_DEL  += res[0]   # DEL
            users_INS  += res[1]   # INS
            NDCG       += res[2]   # NDCG

            POS_at_1   += res[3]   # POS@1
            POS_at_5   += res[4]   # POS@5
            POS_at_10  += res[5]   # POS@10
            POS_at_20  += res[6]   # POS@20
            POS_at_50  += res[7]   # POS@50
            POS_at_100 += res[8]   # POS@100

    a = test_array.shape[0]
    
    return {
        'DEL':       users_DEL  / a,
        'INS':       users_INS  / a,
        'NDCG':      NDCG       / a,
        'POS_at_1':  POS_at_1   / a,
        'POS_at_5':  POS_at_5   / a,
        'POS_at_10': POS_at_10  / a,
        'POS_at_20': POS_at_20  / a,
        'POS_at_50': POS_at_50  / a,
        'POS_at_100': POS_at_100 / a
    }

In [None]:
def plot_all_metrics(results, data_name, recommender_name):
    # Mapping of metrics to their display properties
    # Assuming 5 steps, the x-axis label for INS should be different
    metrics_mapping = {
        'DEL':        ('DEL@Ke',     "Number of Masked Items"),
        'INS':        ('INS@Ke',     "Number of Items Added"),
        'NDCG':       ('CDCG@Ke',    "Number of Masked Items"),

        'POS_at_1':   ('POS@1,Ke',   "Number of Masked Items"),
        'POS_at_5':   ('POS@5,Ke',   "Number of Masked Items"),
        'POS_at_10':  ('POS@10,Ke',  "Number of Masked Items"),
        'POS_at_20':  ('POS@20,Ke',  "Number of Masked Items"),
        'POS_at_50':  ('POS@50,Ke',  "Number of Masked Items"),
        'POS_at_100': ('POS@100,Ke', "Number of Masked Items"),
    }

    # Styling
    colors = [
    '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
    '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
    '#bcbd22', '#17becf', '#393b79', '#8c6d31'
    ]
    markers = ['o', 's', '^', 'D', 'v', 'x', '*', 'P', 'H', '<', '>', 'X']
    linestyles = [
        '-', '--', '-.', ':', (0, (3, 1, 1, 1)), (0, (5, 2)),
        (0, (1, 1)), (0, (5, 1)), (0, (3, 5, 1, 5)), (0, (3, 1, 1, 1, 1, 1)),
        (0, (4, 4)), (0, (2, 2))
    ]

    # Plot each metric
    for metric_name, (y_label, x_label) in metrics_mapping.items():
        plt.figure(figsize=(14, 10))

        # Plot each baseline
        legend_labels = []
        valid_baseline_found = False # Flag to check if any baseline has the metric
        
        for i, (baseline, baseline_metrics) in enumerate(results.items()):
            if baseline=="PI_base":
                baseline="ABLT"
            if metric_name not in baseline_metrics:
                print(f"Warning: {metric_name} not found in {baseline} metrics") # Optional warning
                continue

            valid_baseline_found = True # Mark that we found data for this metric
            values = baseline_metrics[metric_name]

            # Ensure values is a numpy array for plotting
            if not isinstance(values, np.ndarray):
                try:
                    values = np.array(values)
                except Exception as e:
                    print(f"Warning: Could not convert values for {metric_name} in {baseline} to numpy array: {e}")
                    continue

            # Ensure values is not empty and is 1D
            if values.size == 0 or values.ndim != 1:
                 print(f"Warning: Invalid data shape for {metric_name} in {baseline}. Shape: {values.shape}. Skipping plot.")
                 continue

            # Generate x-coordinates (assuming 5 steps, so 5 points)
            # If your results have a different number of steps, adjust this
            num_steps = len(values)
            if num_steps != 5:
                 print(f"Warning: Expected 5 steps for {metric_name} in {baseline}, found {num_steps}. Adjusting x-axis.")
            # x = np.linspace(0, 1, num_steps) # If x-axis should be percentage 0 to 1
            x = np.arange(1, num_steps + 1) # If x-axis should be step number 1 to 5

            plt.plot(
                x, values,
                color=colors[i % len(colors)],
                linestyle=linestyles[i % len(linestyles)],
                marker=markers[i % len(markers)],
                markersize=8,
                linewidth=2,
                label=baseline.upper()
            )
            legend_labels.append(baseline.upper())

        # Only proceed if we found data for this metric
        if not valid_baseline_found:
            print(f"Skipping plot for {metric_name} as no valid data was found in any baseline.")
            plt.close() # Close the empty figure
            continue

        # Use the specific x_label from the mapping
        plt.xlabel(x_label, fontsize=30)
        plt.ylabel(y_label, fontsize=30)
        plt.title(f"{data_name}  |  {recommender_name}  |  {y_label}", fontsize=32, pad=20)
        plt.grid(True, linestyle='--', alpha=0.7, linewidth=0.5)
        plt.xticks(np.arange(1, num_steps + 1), fontsize=18) # Set ticks explicitly for steps 1-5
        plt.yticks(fontsize=18)

        # Add legend if we have labels
        if legend_labels:
            print(metric_name)
            if metric_name=='INS':
                plt.legend(fontsize=14, loc='upper left', frameon=True)
            else:
                plt.legend(fontsize=14, loc='lower left', frameon=True)
        
        # Save plot
        safe_name = metric_name.replace('@', 'at').replace(',', '_')
        plot_path = plots_dir / f'{data_name}_{recommender_name}_{safe_name}_d.pdf'
        plt.savefig(plot_path, format='pdf', bbox_inches='tight', dpi=1000)
        print(f"Saved plot: {plot_path.name}") # Print just the filename
        plt.close() # Close the figure to free memory

# RUN

In [None]:
# Initialize storage for all results
all_results = {}

workspace_root = export_dir.parent

for data_name in data_names:
    DP_DIR = Path("processed_data", data_name)
    files_path = Path(workspace_root, DP_DIR)
    checkpoints_path = Path(workspace_root, "check")

    # Load dataset-specific parameters and data
    num_users = num_users_dict[data_name] 
    num_items = num_items_dict[data_name] 
    demographic = demographic_dict[data_name]
    if demographic:
        num_features = features_dict[data_name]
    else:
        num_features = num_items_dict[data_name]

    # Use the correctly defined files_path
    with open(Path(files_path, f'pop_dict_{data_name}.pkl'), 'rb') as f:
        pop_dict = pickle.load(f)
    pop_array = np.zeros(len(pop_dict))
    for key, value in pop_dict.items():
        pop_array[key] = value

    # Load data files using the correct files_path
    train_data = pd.read_csv(Path(files_path,f'train_data_{data_name}.csv'), index_col=0)
    test_data = pd.read_csv(Path(files_path,f'test_data_{data_name}.csv'), index_col=0)
    static_test_data = pd.read_csv(Path(files_path,f'static_test_data_{data_name}.csv'), index_col=0)

    train_array = train_data.to_numpy()
    test_array = test_data.to_numpy() # Use the loaded test_data
    items_array = np.eye(num_items)
    all_items_tensor = torch.Tensor(items_array).to(device)

    for recommender_name in recommender_names:
        print(f"\nProcessing {data_name} dataset with {recommender_name} recommender")

        # Set up recommender-specific parameters
        output_type = output_type_dict[recommender_name]
        hidden_dim = hidden_dim_dict[(data_name,recommender_name)]
        recommender_path = recommender_path_dict[(data_name,recommender_name)]


        # Make sure kw_dict uses the correct paths and variables defined in this scope
        kw_dict = {'device':device,
                  'num_items': num_items,
                  'demographic':demographic,
                  'num_features':num_features,
                  'pop_array':pop_array,
                  'all_items_tensor':all_items_tensor,
                  'static_test_data':static_test_data,
                  'items_array':items_array,
                  'output_type':output_type,
                  'recommender_name':recommender_name}

        # Ensure load_recommender uses the correct path
        recommender = load_recommender(data_name, hidden_dim, checkpoints_path, recommender_path, **kw_dict)

        # Process each explanation method
        results = {}
        for expl_name in expl_names:
            try:
                # If it uses global variables, make sure they are correctly set
                current_results = eval_one_expl_type(expl_name) # Pass kw_dict if needed: eval_one_expl_type(expl_name, kw_dict=kw_dict)
                # Take only first 5 values from each metric array
                results[expl_name] = {
                    metric: values[:5] if isinstance(values, np.ndarray) and len(values) >= 5 else values
                    for metric, values in current_results.items()
                }
                
            except Exception as e:
                print(f"Error processing {expl_name} for {data_name} {recommender_name}: {str(e)}")
                import traceback
                traceback.print_exc() # Print full traceback for debugging
                continue

        # Store results in the overall dictionary
        all_results[(data_name, recommender_name)] = results

        # Generate and save plots
        #### DONT FORGET THE PI RESULTS ALSO NEED TO COME HERE!!!
        if results: # Only plot if there are results 
             plot_all_metrics(results, data_name, recommender_name)
        else:
            print(f"Skipping plots for {data_name} {recommender_name} due to processing errors.")

print("\nProcessing complete. Results and visualizations have been saved to plots_discrete directory.")

In [None]:
# Create the file path
save_path = plots_dir / f'all_results_{data_names}_{recommender_names}.pkl'

# Save the dictionary
with open(save_path, 'wb') as f:
    pickle.dump(all_results, f)

print(f"Saved all_results to: {save_path}")

# Save to XLSX file

In [None]:
# --------------------- helpers ------------------------------------------------
def to_fixed_len(seq, length=5):
    """Return a NumPy array of exactly `length` (pad with NaN if needed)."""
    if isinstance(seq, (list, np.ndarray)):
        arr = np.asarray(seq).flatten()
        if len(arr) < length:
            arr = np.concatenate([arr, np.full(length - len(arr), np.nan)])
        return arr[:length]
    return np.asarray([seq] + [np.nan] * (length - 1))

def safe_sheet_name(name: str) -> str:
    """Sanitise sheet names for Excel (max 31 chars, no : \ / ? * [ ])."""
    name = re.sub(r'[:\\/?*\[\]]', '-', name)   # illegal chars → hyphen
    return name[:31]

# ----------------------------------------------------------------------------- 
# `all_results` is assumed to be filled already.
# structure: { (data_name, recommender_name): { expl_name: {metric: array/scalar}} }

out_dir = Path("discrete_metric_xlsx")
out_dir.mkdir(exist_ok=True)

datasets = {key[0] for key in all_results}  # e.g. {"ML1M", "AMZBooks"}

for data_name in datasets:
    xlsx_path = out_dir / f"{data_name}_discrete_metrics.xlsx"
    with pd.ExcelWriter(xlsx_path, engine="xlsxwriter") as writer:

        # loop through recommender–metric combinations belonging to this dataset
        for (d_name, rec_name), expl_block in all_results.items():
            if d_name != data_name:
                continue

            metrics = next(iter(expl_block.values())).keys()  # take from first expl

            for metric in metrics:
                # build a tidy DataFrame for this block
                rows = []
                for expl_name, metric_dict in expl_block.items():
                    vals = to_fixed_len(metric_dict[metric])
                    rows.append([expl_name, *vals])

                df = pd.DataFrame(
                    rows,
                    columns=["Explanation"] + [f"val_{i+1}" for i in range(5)],
                )

                sheet_name = safe_sheet_name(f"{rec_name}_{metric}")
                df.to_excel(writer, sheet_name=sheet_name, index=False)

    print(f"✔  Written {xlsx_path}")