# Visualization of Metrics

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import os

## Load Results

In [None]:
# This is a placeholder for loading the results
# In a real scenario, you would load the results from a file
all_results = {}

## Plotting Functions

In [None]:
def plot_all_metrics(results, data_name, recommender_name):
    # Mapping between metric keys and their display names (title, y-label, indicator)
    metrics_mapping = {
        'DEL':      ('AUC DEL-P@K', 'DEL@Kₑ', 'Lower is better'),
        'INS':      ('AUC INS-P@K', 'INS@Kₑ', 'Higher is better'),
        'NDCG':     ('AUC NDCG-P',  'CNDCG@Kₑ', 'Lower is better'),
        'POS_at_5': ('AUC POS-P@5', 'POS@20Kₑ', 'Lower is better'),
        'POS_at_10':('AUC POS-P@10','POS@20Kₑ', 'Lower is better'),
        'POS_at_20':('AUC POS-P@20','POS@20Kₑ', 'Lower is better'),
        'NEG_at_5': ('AUC NEG-P@5', 'NEG@20Kₑ', 'Higher is better'),
        'NEG_at_10':('AUC NEG-P@10','NEG@20Kₑ', 'Higher is better'),
        'NEG_at_20':('AUC NEG-P@20','NEG@20Kₑ', 'Higher is better')
    }
    
    # Style lists
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    markers = ['o', 's', '^', 'D', 'v', 'x']
    linestyles = ['-', '--', '-.', ':', (0, (3, 1, 1, 1)), (0, (5, 2))]
    
    # Create plots directory
    os.makedirs('results/plots', exist_ok=True)
    
    # Plot each metric
    for metric, (title_name, y_label, indicator) in metrics_mapping.items():
        plt.figure(figsize=(12, 8))
        
        for i, (baseline, baseline_metrics) in enumerate(results.items()):
            if metric not in baseline_metrics:
                print(f"Warning: {metric} not found in {baseline} metrics")
                continue
            
            values = baseline_metrics[metric][:5]  # Take only first 5 values
            x = range(1, len(values) + 1)  # Numbers of masked items (1, 2, 3, 4, 5)
            
            plt.plot(
                x, values,
                label=baseline.upper(),
                color=colors[i % len(colors)],
                linestyle=linestyles[i % len(linestyles)],
                marker=markers[i % len(markers)],
                markersize=8,
                linewidth=2,
                markevery=1  # Markers on each value
            )
        
        plt.xlabel("Number of Masked Items", fontsize=30)
        plt.ylabel(y_label, fontsize=30)
        plt.grid(True, linestyle='--', alpha=0.7, linewidth=0.5)
        plt.xticks(range(1, 6), fontsize=18)
        plt.yticks(fontsize=18)
        plt.legend(
            loc='best', 
            fontsize=20,
            frameon=True,
            edgecolor='black'
        )

        plt.tight_layout()
        
        # Save the plot
        safe_display_name = title_name.replace(" ", "_").replace("@", "at")
        plt.savefig(f'results/plots/{safe_display_name}_{data_name}_{recommender_name}d.pdf',
                    format='pdf', bbox_inches='tight')
        print(f"Saved plot: results/plots/{safe_display_name}_{data_name}_{recommender_name}d.pdf")
        plt.close()

In [None]:
def create_comparison_visualizations(all_results, save_dir='results/plots'): 
    """
    Creates comprehensive visualizations comparing methods across datasets and recommenders
    
    Parameters:
    all_results: dict
        Format: {(dataset_name, recommender_name): results_dict}
        where results_dict contains metrics for each explanation method
    save_dir: str
        Directory to save the visualization files
    """
    # Prepare data for plotting
    plot_data = []
    for (dataset, recommender), results in all_results.items():
        for method, metrics in results.items():
            for metric_name, values in metrics.items():
                if isinstance(values, np.ndarray):
                    for step, value in enumerate(values, 1):
                        plot_data.append({
                            'Dataset': dataset,
                            'Recommender': recommender,
                            'Method': method.upper(),
                            'Metric': metric_name,
                            'Step': step,
                            'Value': value
                        })
    
    df = pd.DataFrame(plot_data)

    # 1. Heatmap of method performance across datasets and recommenders
    plt.figure(figsize=(15, 10))
    metrics_to_plot = ['DEL', 'INS', 'NDCG']
    
    for idx, metric in enumerate(metrics_to_plot):
        plt.subplot(1, 3, idx+1)
        pivot_data = df[df['Metric'] == metric].groupby(
            ['Dataset', 'Recommender', 'Method'])['Value'].mean().unstack()
        sns.heatmap(pivot_data, annot=True, fmt='.3f', cmap='YlOrRd')
        plt.title(f'{metric} Performance Comparison')
    
    plt.tight_layout()
    plt.savefig(f'{save_dir}/performance_heatmap.png')
    plt.close()

    # 2. Method stability analysis (standard deviation across steps)
    plt.figure(figsize=(12, 6))
    stability_data = df.groupby(['Method', 'Metric'])['Value'].std().unstack()
    stability_data.plot(kind='bar', width=0.8)
    plt.title('Method Stability Analysis (Standard Deviation Across Steps)')
    plt.xlabel('Explanation Method')
    plt.ylabel('Standard Deviation')
    plt.xticks(rotation=45)
    plt.legend(title='Metrics', bbox_to_anchor=(1.05, 1))
    plt.tight_layout()
    plt.savefig(f'{save_dir}/method_stability.png')
    plt.close()

    # 3. Radar chart for method comparison
    def create_radar_chart(data, methods, metrics):
        angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
        
        for method in methods:
            values = [data[(data['Method'] == method) & 
                          (data['Metric'] == metric)]['Value'].mean() 
                     for metric in metrics]
            values += values[:1]
            angles_plot = np.concatenate([angles, [angles[0]]])
            ax.plot(angles_plot, values, 'o-', label=method)
            ax.fill(angles_plot, values, alpha=0.25)
        
        ax.set_xticks(angles)
        ax.set_xticklabels(metrics)
        ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
        
        return fig

    metrics_for_radar = ['DEL', 'INS', 'NDCG', 'POS_at_5', 'POS_at_10']
    for dataset in df['Dataset'].unique():
        for recommender in df['Recommender'].unique():
            data_subset = df[(df['Dataset'] == dataset) & 
                           (df['Recommender'] == recommender)]
            fig = create_radar_chart(data_subset, 
                                   df['Method'].unique(), 
                                   metrics_for_radar)
            plt.title(f'{dataset} - {recommender}\nMethod Comparison')
            plt.savefig(f'{save_dir}/radar_{dataset}_{recommender}.png')
            plt.close()

    # 4. Box plots showing distribution of metrics across steps
    plt.figure(figsize=(15, 10))
    for idx, metric in enumerate(['DEL', 'INS', 'NDCG'], 1):
        plt.subplot(1, 3, idx)
        sns.boxplot(data=df[df['Metric'] == metric], 
                   x='Method', y='Value', 
                   hue='Dataset')
        plt.title(f'{metric} Distribution')
        plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f'{save_dir}/metric_distributions.png')
    plt.close()

    # 5. Performance improvement over steps
    plt.figure(figsize=(15, 8))
    for metric in ['DEL', 'INS', 'NDCG']:
        plt.subplot(1, 3, metrics_to_plot.index(metric) + 1)
        for method in df['Method'].unique():
            data = df[(df['Metric'] == metric) & (df['Method'] == method)]
            plt.plot(data.groupby('Step')['Value'].mean(), 
                    marker='o', 
                    label=method)
        plt.title(f'{metric} Progress Over Steps')
        plt.xlabel('Step')
        plt.ylabel('Value')
        if metric == 'NDCG':
            plt.legend(bbox_to_anchor=(1.05, 1))
    plt.tight_layout()
    plt.savefig(f'{save_dir}/progress_over_steps.png')
    plt.close()

    return df

In [None]:
def generate_summary_report(df, save_dir='results/tables'):
    """
    Generates a statistical summary report of the results
    """
    # Calculate aggregate statistics
    summary = df.groupby(['Dataset', 'Recommender', 'Method', 'Metric'])['Value'].agg([
        'mean', 'std', 'min', 'max'
    ]).round(3)
    
    # Save summary to CSV
    summary.to_csv(f'{save_dir}/summary_statistics.csv')
    
    # Calculate method rankings
    rankings = df.groupby(['Dataset', 'Recommender', 'Method', 'Metric'])['Value'].mean().unstack()
    method_ranks = rankings.rank(ascending=False, method='min')
    method_ranks.to_csv(f'{save_dir}/method_rankings.csv')
    
    return summary, method_ranks