### **Cell 1: Imports**

This cell contains all the necessary library imports for the project.



In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os



### **Cell 2: Install Dependencies**

This cell installs the `kaleido` package, which is required by Plotly to export static images (e.g., .png).



In [2]:
# Kaleido is needed for exporting plotly figures to static images.
%pip install -U kaleido

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




### **Cell 3: Configuration**

This cell centralizes all configurations, such as file paths, algorithm order for plotting, metrics, and column name mappings.



In [None]:
# Define the order for algorithms in plots for consistency
ALGORITHM_ORDER = [
    'AVF', 'CBRW', 'CompreX', 'FPOF', 'POP', 'SCAN', 'SDRW', 'Zero++',
    'DeepSVDD_ca', 'DeepSVDD_idf', 'DeepSVDD_onehot', 'DeepSVDD_pivot', 'DeepSVDD_nocat',
    'FastABOD_ca', 'FastABOD_idf', 'FastABOD_onehot', 'FastABOD_pivot', 'FastABOD_nocat',
    'iForest_ca', 'iForest_idf', 'iForest_onehot', 'iForest_pivot', 'iForest_nocat',
    'KNN_ca', 'KNN_idf', 'KNN_onehot', 'KNN_pivot', 'KNN_nocat',
    'LOF_ca', 'LOF_idf', 'LOF_onehot', 'LOF_pivot', 'LOF_nocat',
    'McCatch_ca', 'McCatch_idf', 'McCatch_onehot', 'McCatch_pivot', 'McCatch_nocat'
]

# Define metrics and aggregation types to process
METRICS = ['auc', 'adj_r_precision', 'adj_average_precision', 'adj_max_f1']
AGG_TYPES = ['average', 'best']

# Define file groups for generating plots
FILE_GROUPS = ['binary.csv', 'categorical.csv', 'context.csv', 'feature.csv', 'instance.csv']

# Define paths
BASE_PATH = r'../../../results/base_experiments/plot/PAIRPLOT'

# Dictionary for renaming columns to be more descriptive in plots
COLUMN_RENAMES = {
    'auc': 'AVG Rank (AUC)',
    'adj_r_precision': 'AVG Rank (P@n)',
    'adj_average_precision': 'AVG Rank (AP)',
    'adj_max_f1': 'AVG Rank (Max-F1)',
    'r_precision': 'AVG Rank (P@n)',
    'average_precision': 'AVG Rank (AP)',
    'max_f1': 'AVG Rank (Max-F1)'
}



### **Cell 4: Helper Functions**

This cell contains helper functions to prepare data and generate plots, reducing code duplication.



In [None]:
def get_style_maps(algorithms_list):
    """
    Generates consistent color and symbol mappings for a list of algorithms.
    """
    colors = px.colors.qualitative.Plotly + px.colors.qualitative.Alphabet
    symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'star', 'star-triangle-up', 'hourglass', 'circle-cross']
    
    color_map = {algo: colors[i % len(colors)] for i, algo in enumerate(algorithms_list)}
    symbol_map = {algo: symbols[i % len(symbols)] for i, algo in enumerate(algorithms_list)}
    
    return color_map, symbol_map

def prepare_dataframe_for_plotting(df, algorithm_order):
    """
    Prepares the DataFrame by sorting, adding an ideal baseline, and setting categorical order.
    """
    # Add an ideal algorithm for baseline comparison
    ideal_row = {'algorithm': '_IDEAL_'}
    ideal_row.update({col: 1.0 for col in df.columns if col != 'algorithm'})
    df_ideal = pd.DataFrame(ideal_row, index=[0])
    
    df = pd.concat([df_ideal, df], ignore_index=True)
    
    # Set a categorical order for algorithms to ensure consistent plotting
    df['algorithm'] = pd.Categorical(df['algorithm'], categories=['_IDEAL_'] + algorithm_order, ordered=True)
    df = df.sort_values('algorithm')
    
    return df

def generate_and_save_plot(df, title, output_path):
    """
    Generates a Plotly scatter matrix and saves it as a PNG file.
    """
    color_map, symbol_map = get_style_maps(df['algorithm'].unique())
    
    fig = px.scatter_matrix(
        df,
        dimensions=df.columns.drop('algorithm'),
        symbol="algorithm",
        color="algorithm",
        symbol_map=symbol_map,
        color_discrete_map=color_map,
        width=1000,
        height=905
    )
    
    fig.update_traces(marker=dict(size=10))
    fig.update_layout(
        title=title,
        legend_title_text="Algorithms"
    )
    
    fig.write_image(output_path)
    print(f"Saved plot to {output_path}")



### **Cell 5: Generate Plots for Dataset Groups**

This cell iterates through different dataset groups, metrics, and aggregation types to generate and save a pair plot for each combination.



In [None]:
for agg_type in AGG_TYPES:
    for metric in METRICS:
        for file_group in FILE_GROUPS:
            file_name = file_group.replace('.csv', f'_{metric}.csv')
            input_path = os.path.join(BASE_PATH, agg_type, file_name)
            
            if not os.path.exists(input_path):
                print(f"Skipping, file not found: {input_path}")
                continue

            df = pd.read_csv(input_path)
            df = prepare_dataframe_for_plotting(df, ALGORITHM_ORDER)
            df.rename(columns=COLUMN_RENAMES, inplace=True)
            
            title = file_group.split('.')[0].replace('_', ' ').title()
            output_filename = f"{title.replace(' ', '_')}_{agg_type}_{metric}.png"
            output_path = os.path.join(BASE_PATH, agg_type, output_filename)
            
            generate_and_save_plot(df, title, output_path)



### **Cell 6: Generate Combined "General" Plot**

This cell creates a single pair plot that combines all metrics for the "general" dataset group, providing a comprehensive overview.



In [None]:
for agg_type in AGG_TYPES:
    
    # Load and merge data for all metrics into a single DataFrame
    df_list = []
    for i, metric in enumerate(METRICS):
        file_name = f'general_{metric}.csv'
        input_path = os.path.join(BASE_PATH, agg_type, file_name)
        
        if not os.path.exists(input_path):
            print(f"Skipping general plot for '{agg_type}', file not found: {input_path}")
            continue
            
        df_metric = pd.read_csv(input_path)
        if i > 0:
            df_metric = df_metric.sort_values('algorithm')
            df_metric.reset_index(inplace=True)
            df_metric = df_metric.drop(columns=['algorithm']) # Keep only the metric column
        df_list.append(df_metric)
    
    if not df_list:
        continue

    df_combined = pd.concat(df_list, axis=1)
    df_combined.columns = ['algorithm'] + [m.replace('adj_', '') for m in METRICS]

    # Prepare and plot the combined data
    df_combined = prepare_dataframe_for_plotting(df_combined, ALGORITHM_ORDER)
    df_combined.rename(columns=COLUMN_RENAMES, inplace=True)
    
    title = f"General Performance Overview ({agg_type.title()})"
    output_filename = f"General_Overview_{agg_type}.png"
    output_path = os.path.join(BASE_PATH, agg_type, output_filename)
    
    generate_and_save_plot(df_combined, title, output_path)


### **Cell 7: Generate Plots for Specific Groups**

This cell iterates through a specific list of dataset groups (including `binary_nonbinary`) to generate and save a pair plot for each combination of metric and aggregation type.



In [None]:
# Define the specific file groups to be processed in this block
file_groups_to_process = [
    'binary.csv', 
    'categorical.csv', 
    'context.csv', 
    'feature.csv', 
    'instance.csv'
]

# Main loop to iterate through aggregation types, metrics, and file groups
for agg_type in AGG_TYPES:
    for metric in METRICS:
        for file_group in file_groups_to_process:
            # Construct the full input file path
            file_name = file_group.replace('.csv', f'_{metric}.csv')
            input_path = os.path.join(BASE_PATH, agg_type, file_name)
            
            # Check if the source file exists before proceeding
            if not os.path.exists(input_path):
                print(f"Skipping, file not found: {input_path}")
                continue

            # Read the data and prepare it for plotting using the helper function
            df = pd.read_csv(input_path)
            df = prepare_dataframe_for_plotting(df, ALGORITHM_ORDER)
            
            # Rename columns for better readability in the plot legend
            df.rename(columns=COLUMN_RENAMES, inplace=True)
            
            # Create a clean title and output filename
            title = file_group.split('.')[0].replace('_', ' ').title()
            output_filename = f"{title.replace(' ', '_')}_{agg_type}_{metric}.png"
            output_path = os.path.join(BASE_PATH, agg_type, output_filename)
            
            # Generate and save the plot using the dedicated helper function
            generate_and_save_plot(df, title, output_path)

print("\n--- All specific group plots have been generated. ---")