### **Cell 1: Imports and Initial Setup**

This cell imports the necessary libraries and sets up basic configurations like display options and warning filters.



In [None]:
import pandas as pd
import os
import json
import re
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
# Configure pandas to display all columns
pd.set_option('display.max_columns', None)



### **Cell 2: Configuration of Algorithms and Datasets**

This cell defines the paths to the algorithm result files and lists the datasets to be analyzed. This centralized configuration makes it easy to manage inputs.



In [None]:
# Dictionary mapping algorithm names to their result file paths
algorithms = {
    'AVF': r'..\..\results\experiments\algorithms\AVF\AVF.csv',
    'CBRW': r'..\..\results\experiments\algorithms\CBRW\CBRW.csv',
    'CompreX': r'..\..\results\experiments\algorithms\CompreX\COMPREX.csv',
    'FPOF': r'..\..\results\experiments\algorithms\FPOF\FPOF.csv',
    'POP': r'..\..\results\experiments\algorithms\POP\POP.csv',
    'SCAN': r'..\..\results\experiments\algorithms\SCAN\SCAN.csv',
    'SDRW': r'..\..\results\experiments\algorithms\SDRW\SDRW.csv',
    'Zero++': r'..\..\results\experiments\algorithms\Zero++\ZERO++.csv',
    
    'DeepSVDD_ca': r'..\..\results\experiments\algorithms\DeepSVDD\ca\CA.csv',
    'DeepSVDD_idf': r'..\..\results\experiments\algorithms\DeepSVDD\idf\IDF.csv',
    'DeepSVDD_one': r'..\..\results\experiments\algorithms\DeepSVDD\one_hot\ONE_HOT.csv',
    'DeepSVDD_pivot': r'..\..\results\experiments\algorithms\DeepSVDD\pivot\PIVOT.csv',
    'DeepSVDD_nocat': r'..\..\results\experiments\algorithms\DeepSVDD\nocat\NOCAT.csv',
    'FastABOD_ca': r'..\..\results\experiments\algorithms\FastABOD\ca\CA.csv',
    'FastABOD_idf': r'..\..\results\experiments\algorithms\FastABOD\idf\IDF.csv',
    'FastABOD_one': r'..\..\results\experiments\algorithms\FastABOD\one_hot\ONE_HOT.csv',
    'FastABOD_pivot': r'..\..\results\experiments\algorithms\FastABOD\pivot\PIVOT.csv',
    'FastABOD_nocat': r'..\..\results\experiments\algorithms\FastABOD\nocat\NOCAT.csv',
    'iForest_ca': r'..\..\results\experiments\algorithms\iForest\ca\CA.csv',
    'iForest_idf': r'..\..\results\experiments\algorithms\iForest\idf\IDF.csv',
    'iForest_one': r'..\..\results\experiments\algorithms\iForest\one_hot\ONE_HOT.csv',
    'iForest_pivot': r'..\..\results\experiments\algorithms\iForest\pivot\PIVOT.csv',
    'iForest_nocat': r'..\..\results\experiments\algorithms\iForest\nocat\NOCAT.csv',
    'KNN_ca': r'..\..\results\experiments\algorithms\KNN\ca\CA.csv',
    'KNN_idf': r'..\..\results\experiments\algorithms\KNN\idf\IDF.csv',
    'KNN_one': r'..\..\results\experiments\algorithms\KNN\one_hot\ONE_HOT.csv',
    'KNN_pivot': r'..\..\results\experiments\algorithms\KNN\pivot\PIVOT.csv',
    'KNN_nocat': r'..\..\results\experiments\algorithms\KNN\nocat\NOCAT.csv',
    'LOF_ca': r'..\..\results\experiments\algorithms\LOF\ca\CA.csv',
    'LOF_idf': r'..\..\results\experiments\algorithms\LOF\idf\IDF.csv',
    'LOF_one': r'..\..\results\experiments\algorithms\LOF\one_hot\ONE_HOT.csv',
    'LOF_pivot': r'..\..\results\experiments\algorithms\LOF\pivot\PIVOT.csv',
    'LOF_nocat': r'..\..\results\experiments\algorithms\LOF\nocat\NOCAT.csv',
    'McCatch_ca': r'..\..\results\experiments\algorithms\McCatch\ca\CA.csv',
    'McCatch_idf': r'..\..\results\experiments\algorithms\McCatch\idf\IDF.csv',
    'McCatch_one': r'..\..\results\experiments\algorithms\McCatch\one_hot\ONE_HOT.csv',
    'McCatch_pivot': r'..\..\results\experiments\algorithms\McCatch\pivot\PIVOT.csv',
    'McCatch_nocat': r'..\..\results\experiments\algorithms\McCatch\nocat\NOCAT.csv',
 }

# List of all datasets included in the study
all_dataset_names = [
    'ad_nominal', 'AID362red_train_allpossiblenominal', 'apascal_entire_trainvsall',
    'cmc-nominal', 'covertype_nominal_4vs123567', 'kddcup99-corrected-u2rvsnormal-nominal-cleaned',
    'list_attr_celeba_baldvsnonbald', 'Reuters-corn-100', 'w7a-libsvm-nonsparse',
    'bank-additional-ful-nominal', 'solar-flare_FvsAll-cleaned', 'anneal',
    'australian', 'crx', 'ecoli', 'german', 'heart', 'hepatitis', 'lymphography',
    'nursery', 'bands_band_16_variant1ori', 'creditA_plus_42_variant1ori',
    'sick_sick_35_variant1ori', 'thyroid_disease_variant1ori', 'scenario-1-1-rcat-0-icat',
    'scenario-1-2-rcat-0-icat', 'scenario-1-3-rcat-0-icat', 'scenario-1-4-rcat-0-icat',
    'scenario-1-5-rcat-0-icat', 'scenario-1-6-rcat-0-icat', 'scenario-1-7-rcat-0-icat',
    'scenario-1-8-rcat-0-icat', 'scenario-1-9-rcat-0-icat', 'scenario-1-9-rcat-1-icat',
    'scenario-1-9-rcat-2-icat', 'scenario-1-9-rcat-3-icat', 'scenario-1-9-rcat-4-icat',
    'scenario-1-9-rcat-5-icat', 'scenario-1-9-rcat-6-icat', 'scenario-1-9-rcat-7-icat',
    'scenario-1-9-rcat-8-icat', 'scenario-1-9-rcat-9-icat', 'KDDTrain20FS',
    'KDDTrain20ProbeFS', 'KDDTrain20R2LFS', 'census-income-full-nominal',
    'chess_krkopt_zerovsall'
]



### **Cell 3: Helper Functions for Ranking and Styling**

This cell contains utility functions for calculating ranks and for styling DataFrames by highlighting the best value in each row.



In [None]:
def rank_values(value_list: list) -> list:
    """
    Calculates ranks for a list of values, with 1 being the best (highest value).
    Handles ties by assigning them the same rank.
    """
    series = pd.Series(value_list)
    # Rank in descending order, assigning the minimum rank in case of a tie
    return series.rank(ascending=False, method='min').tolist()

def highlight_min_in_row(df: pd.DataFrame):
    """
    Applies a style to a DataFrame to bold the minimum value in each row.
    This is used to highlight the best rank (rank 1).
    """
    def style_min(row):
        # Clean row data, handling strings with '±' for mean/std deviation
        try:
            numeric_row = row.apply(lambda x: float(str(x).split('\u00B1')[0]))
        except (ValueError, TypeError):
            return ['' for _ in row] # Return no style if conversion fails
            
        is_min = numeric_row == numeric_row.min()
        return ['font-weight: bold' if v else '' for v in is_min]
    
    return df.style.apply(style_min, axis=1)



### **Cell 4: Core Data Loading and Processing Function**

This is the main function responsible for loading raw result files, processing them based on the desired metric and aggregation type (`best` or `average`), calculating ranks, and saving the final table to a CSV file.



In [None]:
def create_ranking_table(allowed_datasets: list, output_filename: str, metric: str = 'auc', agg_type: str = 'average'):
    """
    Loads, processes, and ranks algorithm results for a specific group of datasets.
    
    Args:
        allowed_datasets (list): A list of dataset names to include in the table.
        output_filename (str): The name of the CSV file to save the results.
        metric (str): The performance metric to use (e.g., 'auc').
        agg_type (str): The aggregation type ('average' or 'best').
    """
    # Default values for missing data
    default_values = {'auc': 0.5, 'adj_r_precision': 0, 'adj_average_precision': 0, 'adj_max_f1': 0}
    
    # --- 1. Load and Aggregate Results ---
    results_data = {}
    allowed_datasets_clean = [d.replace('.csv', '') for d in allowed_datasets]

    for algo_name, path in algorithms.items():
        processed_rows = []
        if not os.path.exists(path):
            continue

        try:
            df = pd.read_csv(path, sep=';').drop_duplicates()
            df['dataset'] = df['dataset'].str.replace(r'(_v[0-9]{1,2})?\.csv$', '', regex=True)
        except FileNotFoundError:
            df = pd.DataFrame() # Create empty DataFrame if file not found

        for dataset_name in allowed_datasets_clean:
            df_subset = df[df['dataset'] == dataset_name]
            
            if df_subset.empty:
                # Add default value if dataset is missing for this algorithm
                processed_rows.append(pd.Series([dataset_name, default_values[metric]], index=['dataset', metric]))
                continue

            if agg_type == 'average':
                # Calculate the mean score across all parameters
                mean_score = df_subset[metric].mean()
                processed_rows.append(pd.Series([dataset_name, mean_score], index=['dataset', metric]))
            else: # 'best'
                # Find the highest score among all parameters
                best_row = df_subset.loc[df_subset[metric].idxmax()]
                processed_rows.append(best_row)
        
        results_data[algo_name] = pd.DataFrame(processed_rows)[['dataset', metric]].reset_index(drop=True)

    # --- 2. Assemble Performance and Ranking Tables ---
    columns = ['Dataset'] + list(results_data.keys())
    performance_rows = []
    ranking_rows = []

    for dataset_name in allowed_datasets_clean:
        perf_row = [dataset_name]
        for algo_name, df_result in results_data.items():
            score = df_result.query("dataset == @dataset_name")[metric].iloc[0]
            perf_row.append(score)
        
        performance_rows.append(perf_row)
        # Calculate ranks for the current dataset's performance row
        ranking_rows.append([dataset_name] + rank_values(perf_row[1:]))

    df_performance = pd.DataFrame(performance_rows, columns=columns)
    df_ranking = pd.DataFrame(ranking_rows, columns=columns)

    # --- 3. Calculate and Append Summary Statistics ---
    avg_ranks = []
    avg_metrics = []
    for col in df_performance.columns[1:]:
        avg_ranks.append(f"{df_ranking[col].mean():.2f} ± {df_ranking[col].std():.2f}")
        avg_metrics.append(f"{df_performance[col].mean():.3f} ± {df_performance[col].std():.2f}")
    
    # Append summary rows to the performance table
    df_performance.loc[len(df_performance)] = [f'Average {metric.upper()}'] + avg_metrics
    df_performance.loc[len(df_performance)] = ['Average Rank'] + avg_ranks
    
    # --- 4. Save the Final Table ---
    output_dir = os.path.join(r'..\..\results\experiments\tables', agg_type)
    os.makedirs(output_dir, exist_ok=True)
    df_performance.to_csv(os.path.join(output_dir, f'{output_filename}_{metric}.csv'), index=False, sep=';')

    return df_performance



### **Cell 5: Dataset Categorization Functions**

This block defines functions to group datasets based on their properties (e.g., number of features, number of instances, data type). This allows for more granular analysis of algorithm performance.



In [None]:
# Load dataset summary file
df_summary = pd.read_csv(r'..\resume_datasets.csv', sep=';')

def get_datasets_by_feature_count(low_thresh, med_thresh):
    """Groups datasets by the number of features."""
    low = df_summary[df_summary['features'] <= low_thresh]['file'].tolist()
    medium = df_summary[(df_summary['features'] > low_thresh) & (df_summary['features'] <= med_thresh)]['file'].tolist()
    high = df_summary[df_summary['features'] > med_thresh]['file'].tolist()
    return {'low_feature': low, 'medium_feature': medium, 'high_feature': high}

def get_datasets_by_instance_count(low_thresh, med_thresh):
    """Groups datasets by the number of instances."""
    low = df_summary[df_summary['instances'] <= low_thresh]['file'].tolist()
    medium = df_summary[(df_summary['instances'] > low_thresh) & (df_summary['instances'] <= med_thresh)]['file'].tolist()
    high = df_summary[df_summary['instances'] > med_thresh]['file'].tolist()
    return {'low_instance': low, 'medium_instance': medium, 'high_instance': high}

def get_datasets_by_categorical_percentage(low_thresh, med_thresh):
    """Groups mixed-type datasets by the percentage of categorical features."""
    df_mixed = df_summary[df_summary['attr_categorics'] > 0]
    low = df_mixed[df_mixed['%_categorics'] <= low_thresh]['file'].tolist()
    medium = df_mixed[(df_mixed['%_categorics'] > low_thresh) & (df_mixed['%_categorics'] <= med_thresh)]['file'].tolist()
    high = df_mixed[df_mixed['%_categorics'] > med_thresh]['file'].tolist()
    return {'low_categorical': low, 'medium_categorical': medium, 'high_categorical': high}

def get_datasets_by_binary_percentage(threshold):
    """For purely categorical datasets, splits them based on the percentage of binary features."""
    df_cat_only = df_summary[df_summary['%_categorics'] >= 100]
    low = df_cat_only[df_cat_only['%_binaries'] <= threshold]['file'].tolist()
    high = df_cat_only[df_cat_only['%_binaries'] > threshold]['file'].tolist()
    return {'low_binary': low, 'high_binary': high}

def get_datasets_by_context():
    """Groups datasets by their context/domain."""
    return {
        'medicine_context': ['thyroid_disease_variant1ori.csv', 'sick_sick_35_variant1ori.csv', 'cmc-nominal.csv', 'ecoli.csv', 'heart.csv', 'hepatitis.csv', 'lymphography.csv', 'nursery.csv'],
        'finance_context': ['bank-additional-ful-nominal_processed.csv', 'creditA_plus_42_variant1ori.csv', 'australian.csv', 'crx.csv', 'german.csv', 'Reuters-corn-100.csv'],
        'invasion_context': ['kddcup99-corrected-u2rvsnormal-nominal-cleaned.csv', 'KDDTrain20R2LFS.csv', 'KDDTrain20ProbeFS.csv', 'KDDTrain20FS.csv'],
        'sciency_context': ['solar-flare_FvsAll-cleaned_processed.csv', 'bands_band_16_variant1ori.csv', 'anneal.csv', 'covertype_nominal_4vs123567.csv', 'AID362red_train_allpossiblenominal.csv', 'list_attr_celeba_baldvsnonbald.csv', 'w7a-libsvm-nonsparse.csv'],
        'synthetic_context': ['scenario-1-1-rcat-0-icat.csv', 'scenario-1-2-rcat-0-icat.csv', 'scenario-1-3-rcat-0-icat.csv', 'scenario-1-4-rcat-0-icat.csv', 'scenario-1-5-rcat-0-icat.csv', 'scenario-1-6-rcat-0-icat.csv',
                          'scenario-1-7-rcat-0-icat.csv', 'scenario-1-8-rcat-0-icat.csv', 'scenario-1-9-rcat-0-icat.csv', 'scenario-1-9-rcat-1-icat.csv', 'scenario-1-9-rcat-2-icat.csv', 'scenario-1-9-rcat-3-icat.csv',
                          'scenario-1-9-rcat-4-icat.csv', 'scenario-1-9-rcat-5-icat.csv', 'scenario-1-9-rcat-6-icat.csv', 'scenario-1-9-rcat-7-icat.csv', 'scenario-1-9-rcat-8-icat.csv', 'scenario-1-9-rcat-9-icat.csv']
    }



### **Cell 6: Main Execution: Generate All Ranking Tables**

This is the main execution block. It iterates through all metrics, aggregation types, and dataset groups to generate and save all the ranking tables for the entire study.



In [None]:
metrics_to_process = ['auc', 'adj_r_precision', 'adj_average_precision', 'adj_max_f1']
aggregation_types = ['average', 'best']

# Define all dataset groupings
dataset_groups = {
    'general': all_dataset_names,
    **get_datasets_by_feature_count(15, 25),
    **get_datasets_by_instance_count(10000, 20000),
    **get_datasets_by_categorical_percentage(34, 67),
    **get_datasets_by_binary_percentage(50),
    **get_datasets_by_context(),
}

# Main loop to generate all tables
for agg_type in aggregation_types:
    for metric in metrics_to_process:
        for group_name, dataset_list in dataset_groups.items():
            if not dataset_list:
                print(f"Skipping {group_name} for {metric} ({agg_type}) - No datasets in this group.")
                continue
            
            print(f"Processing: {group_name}, Metric: {metric}, Type: {agg_type}")
            output_file = f"table_{group_name}"
            create_ranking_table(dataset_list, output_file, metric, agg_type)

print("\n--- All ranking tables have been generated. ---")



### **Cell 7: Example: Display and Style a Result Table**

This cell demonstrates how to load one of the generated tables and apply the custom styling to highlight the best ranks.



In [None]:
# Example: Load and display the 'average' AUC results for the general dataset group
try:
    df_example = pd.read_csv(r'..\..\tables\average\table_general_auc.csv', sep=';')
    
    # Separate the data from the summary rows for styling
    df_data = df_example.iloc[:-2, :]
    df_summary = df_example.iloc[-2:, :]
    
    # Apply styling to the ranking columns of the data part
    # Note: This requires extracting ranks from the summary row first
    ranking_summary_row = df_summary[df_summary['Dataset'] == 'Average Rank']
    
    # Display the styled table (will render correctly in a Jupyter environment)
    # For simplicity, we display the raw summary here.
    # A more complex function would be needed to style the final table with summary rows.
    display(df_example)

    # Example of styling just the ranks (without the summary)
    # This requires creating the rank table separately first, which the main function does internally.
    # For demonstration, we can re-calculate ranks here.
    df_ranks_only = df_data.set_index('Dataset').rank(axis=1, method='min', ascending=True)
    display(highlight_min_in_row(df_ranks_only))

except FileNotFoundError:
    print("Example file not found. Please run the main processing cell first.")

### **Cell 8: Prepare Data for Pair Plots**

This cell processes the ranking tables generated previously to create summarized CSV files. These files contain the average ranks for different dataset groupings and are specifically formatted for generating pair plots in the next analysis step.



In [None]:
# --- Configuration for Pair Plot Data Generation ---
metrics = ['auc', 'adj_r_precision', 'adj_average_precision', 'adj_max_f1']
aggregation_types = ['average', 'best']
PAIRPLOT_OUTPUT_DIR = r'..\..\results\experiments\plot\PAIRPLOT'

# Define all dataset groupings using the dictionary from Cell 6
# This ensures we use the same dataset lists as in the table generation step.
dataset_groups = {
    'general': all_dataset_names,
    **get_datasets_by_feature_count(15, 25),
    **get_datasets_by_instance_count(10000, 20000),
    **get_datasets_by_categorical_percentage(34, 67),
    **get_datasets_by_binary_percentage(50),
    **get_datasets_by_context(),
}

def extract_average_rank(df: pd.DataFrame) -> list:
    """Extracts the numerical average rank from the summary row of a ranking table."""
    # The 'Average Rank' is the last row
    rank_row = df.iloc[-1].tolist()[1:]
    return [float(str(rank).split('\u00B1')[0]) for rank in rank_row]

# --- Main Loop to Generate Pair Plot CSVs ---
for agg_type in aggregation_types:
    # Create the output directory for the current aggregation type
    os.makedirs(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type), exist_ok=True)
    
    for metric in metrics:
        print(f"Generating pair plot data for: Type='{agg_type}', Metric='{metric}'")

        # --- 1. General Group ---
        # Process all datasets together
        df_general = create_ranking_table(dataset_groups['general'], 'table_general', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_general.columns.tolist()[1:],
            'general': extract_average_rank(df_general),
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'general_{metric}.csv'), sep=',', index=False)

        # --- 2. Feature Count Group ---
        df_low_feat = create_ranking_table(dataset_groups['low_feature'], 'table_feature_low', metric=metric, agg_type=agg_type)
        df_med_feat = create_ranking_table(dataset_groups['medium_feature'], 'table_feature_medium', metric=metric, agg_type=agg_type)
        df_high_feat = create_ranking_table(dataset_groups['high_feature'], 'table_feature_high', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_low_feat.columns.tolist()[1:],
            'low': extract_average_rank(df_low_feat),
            'medium': extract_average_rank(df_med_feat),
            'high': extract_average_rank(df_high_feat)
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'feature_{metric}.csv'), sep=',', index=False)

        # --- 3. Instance Count Group ---
        df_low_inst = create_ranking_table(dataset_groups['low_instance'], 'table_instance_low', metric=metric, agg_type=agg_type)
        df_med_inst = create_ranking_table(dataset_groups['medium_instance'], 'table_instance_medium', metric=metric, agg_type=agg_type)
        df_high_inst = create_ranking_table(dataset_groups['high_instance'], 'table_instance_high', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_low_inst.columns.tolist()[1:],
            'low': extract_average_rank(df_low_inst),
            'medium': extract_average_rank(df_med_inst),
            'high': extract_average_rank(df_high_inst)
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'instance_{metric}.csv'), sep=',', index=False)

        # --- 4. Categorical Percentage Group (for mixed datasets) ---
        df_low_cat = create_ranking_table(dataset_groups['low_categorical'], 'table_categorical_low', metric=metric, agg_type=agg_type)
        df_med_cat = create_ranking_table(dataset_groups['medium_categorical'], 'table_categorical_medium', metric=metric, agg_type=agg_type)
        df_high_cat = create_ranking_table(dataset_groups['high_categorical'], 'table_categorical_high', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_low_cat.columns.tolist()[1:],
            'low': extract_average_rank(df_low_cat),
            'medium': extract_average_rank(df_med_cat),
            'high': extract_average_rank(df_high_cat)
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'categorical_{metric}.csv'), sep=',', index=False)

        # --- 5. Binary Percentage Group (for purely categorical datasets) ---
        df_low_bin = create_ranking_table(dataset_groups['low_binary'], 'table_binary_low', metric=metric, agg_type=agg_type)
        df_high_bin = create_ranking_table(dataset_groups['high_binary'], 'table_binary_high', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_low_bin.columns.tolist()[1:],
            'low_binary': extract_average_rank(df_low_bin),
            'high_binary': extract_average_rank(df_high_bin)
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'binary_{metric}.csv'), sep=',', index=False)
        
        # --- 6. Context Group ---
        df_medicine_bin = create_ranking_table(dataset_groups['medicine_context'], 'table_context_medicine', metric=metric, agg_type=agg_type)
        df_finance_bin = create_ranking_table(dataset_groups['finance_context'], 'table_context_finance', metric=metric, agg_type=agg_type)
        df_invasion_bin = create_ranking_table(dataset_groups['invasion_context'], 'table_context_invasion', metric=metric, agg_type=agg_type)
        df_sciency_bin = create_ranking_table(dataset_groups['sciency_context'], 'table_context_sciency', metric=metric, agg_type=agg_type)
        df_synthetic_bin = create_ranking_table(dataset_groups['synthetic_context'], 'table_context_synthetic', metric=metric, agg_type=agg_type)
        df_plot = pd.DataFrame({
            'algorithm': df_low_bin.columns.tolist()[1:],
            'medicine_context': extract_average_rank(df_medicine_bin),
            'finance_context': extract_average_rank(df_finance_bin),
            'invasion_context': extract_average_rank(df_invasion_bin),
            'sciency_context': extract_average_rank(df_sciency_bin),
            'synthetic_context': extract_average_rank(df_synthetic_bin),
        })
        df_plot.to_csv(os.path.join(PAIRPLOT_OUTPUT_DIR, agg_type, f'context_{metric}.csv'), sep=',', index=False)


print("\n--- All data files for pair plots have been generated. ---")