In [14]:
import pandas as pd

# Load the CSV file
file_path = 'comprehensive_predictions_summary_3.csv'
data = pd.read_csv(file_path)

# Function to extract mean value from the string
def extract_mean(value):
    return float(value.split(' ± ')[0])

# Apply the function to relevant columns to get mean values
data['Accuracy_mean'] = data['Accuracy'].apply(extract_mean)
data['Prediction_Size_90_mean'] = data['Prediction Size (90% Coverage)'].apply(extract_mean)
data['Coverage_90_mean'] = data['90% Coverage'].apply(extract_mean)

# Save the updated CSV
updated_file_path = 'comprehensive_predictions_summary_citeseer.csv'
data.to_csv(updated_file_path, index=False)
print(f"Updated CSV file created: {updated_file_path}")

Updated CSV file created: comprehensive_predictions_summary_3.csv


In [3]:
import os
import re
import pandas as pd
import numpy as np

def extract_values(line):
    pattern = r"Result: \((\d+), \{'Accuracy': ([\d.]+), 'APS': \(([\d.]+), ([\d.]+)\), 'APSeps05': \(([\d.]+), ([\d.]+)\)\}\)"
    match = re.search(pattern, line)
    if match:
        result_num = int(match.group(1))
        accuracy = float(match.group(2))
        coverage_90 = float(match.group(3))
        aps_score = float(match.group(4))
        coverage_95 = float(match.group(5))
        apseps05_score = float(match.group(6))
        return result_num, accuracy, coverage_90, aps_score, coverage_95, apseps05_score
    else:
        raise ValueError(f"Could not extract values from line: {line}")

def parse_filename(filename):
    parts = filename.replace('.txt', '').split('_')
    
    # Determine if the dataset is biased or unbiased
    dataset_type = 'biased' if 'train' in parts else 'unbiased'
    
    data = {
        'dataset_type': dataset_type,
        'dataset': '',
        'model': '',
        'conformal_score': '',
        'method': 'CE',  # Default method
        'alpha_bias': '0.1' if dataset_type == 'biased' else '1.0',  # Default for biased datasets if not specified
        'ld1': '0',  # Default value for ld1
        'ld2': '0'  # Default value for ld2
    }
    
    for i, part in enumerate(parts):
        if part in ['cora', 'citeseer', 'PubMed']:
            data['dataset'] = part
        elif part in ['APPNP', 'GAT', 'GCN', 'GraphSAGE', 'DAGNN']:
            data['model'] = part
        elif part in ['aps', 'raps']:
            data['conformal_score'] = part
        elif part in ['SRGNN', 'cmd', 'MMD', 'CEMD', 'CKLJS', 'CKLJSEM', 'CKL', 'CJS', 'kld', 'jsd', 'emd', 'CE']:
            data['method'] = part
        elif part == 'alpha' and i + 1 < len(parts) and parts[i + 1] == 'bias':
            data['alpha_bias'] = parts[i + 2]
        elif part == 'ld1' and i + 1 < len(parts):
            data['ld1'] = parts[i + 1]
        elif part == 'ld2' and i + 1 < len(parts):
            data['ld2'] = parts[i + 1]
    
    # Determine the method based on the dataset type and filename pattern
    if dataset_type == 'unbiased':
        data['method'] = 'IID'
    elif dataset_type == 'biased' and 'ld1' in parts and 'ld2' in parts and data['method'] == 'CE':
        data['method'] = 'biased(CE)'

    return data

def process_files(folder_path):
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                content = file.read()
                lines = content.strip().split('\n')
                values = [extract_values(line) for line in lines if line.strip()]
                
                if values:
                    _, *metric_values = zip(*values)
                    means = np.mean(metric_values, axis=1)
                    stds = np.std(metric_values, axis=1)
                    result = [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(means, stds)]
                    
                    file_info = parse_filename(filename)
                    data.append(list(file_info.values()) + result)
    
    return data

def create_csv(data, output_file):
    columns = ['Dataset Type', 'Dataset', 'Model', 'Conformal Score', 'Method', 'Alpha Bias', 'LD1', 'LD2',
               'Accuracy', '90% Coverage', 'Prediction Size (90% Coverage)', '95% Coverage', 'Prediction Size (95% Coverage)']
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(output_file, index=False)
    print(f"CSV file created: {output_file}")

# Usage
folder_path = r'D:\csr\CondSR\pred_c'  # Using raw string for Windows path
output_file = 'comprehensive_predictions_summary_citeseer.csv'

data = process_files(folder_path)
create_csv(data, output_file)


CSV file created: comprehensive_predictions_summary_citeseer.csv


In [16]:
import pandas as pd

# Load the CSV file
file_path = 'comprehensive_predictions_summary_4.csv'
data = pd.read_csv(file_path)

# Function to extract mean value from the string
def extract_mean(value):
    return float(value.split(' ± ')[0])

# Apply the function to relevant columns to get mean values
data['Accuracy_mean'] = data['Accuracy'].apply(extract_mean)
data['Prediction_Size_90_mean'] = data['Prediction Size (90% Coverage)'].apply(extract_mean)
data['Coverage_90_mean'] = data['90% Coverage'].apply(extract_mean)

# Save the updated CSV
updated_file_path = 'comprehensive_predictions_summary_4.csv'
data.to_csv(updated_file_path, index=False)
print(f"Updated CSV file created: {updated_file_path}")

Updated CSV file created: comprehensive_predictions_summary_4.csv


# bellow code is for separate columns with only mean values

In [13]:
import pandas as pd

# Load the CSV file
file_path = 'comprehensive_predictions_summary_3.csv'
data = pd.read_csv(file_path)

# Function to extract mean value from the string
def extract_mean(value):
    return float(value.split(' ± ')[0])

# Apply the function to relevant columns to get mean values
data['Accuracy_mean'] = data['Accuracy'].apply(extract_mean)
data['Prediction_Size_90_mean'] = data['Prediction Size (90% Coverage)'].apply(extract_mean)
data['Coverage_90_mean'] = data['90% Coverage'].apply(extract_mean)

# Save the updated CSV
updated_file_path = 'comprehensive_predictions_summary_3.csv'
data.to_csv(updated_file_path, index=False)
print(f"Updated CSV file created: {updated_file_path}")


Updated CSV file created: comprehensive_predictions_summary_3.csv


In [10]:
import os
import re
import pandas as pd
import numpy as np

def extract_values(line):
    pattern = r"Result: \((\d+), \{'Accuracy': ([\d.]+), 'APS': \(([\d.]+), ([\d.]+)\), 'APSeps05': \(([\d.]+), ([\d.]+)\)\}\)"
    match = re.search(pattern, line)
    if match:
        result_num = int(match.group(1))
        accuracy = float(match.group(2))
        coverage_90 = float(match.group(3))
        aps_score = float(match.group(4))
        coverage_95 = float(match.group(5))
        apseps05_score = float(match.group(6))
        return result_num, accuracy, coverage_90, aps_score, coverage_95, apseps05_score
    else:
        raise ValueError(f"Could not extract values from line: {line}")

def parse_filename(filename):
    parts = filename.replace('.txt', '').split('_')
    
    # Determine if the dataset is biased or unbiased
    dataset_type = 'biased' if 'train' in parts else 'unbiased'
    
    data = {
        'dataset_type': dataset_type,
        'dataset': '',
        'model': '',
        'conformal_score': '',
        'method': 'CE',  # Default method
        'alpha_bias': '0.1' if dataset_type == 'biased' else '1.0',  # Default for biased datasets if not specified
        'ld1': '0',  # Default value for ld1
        'ld2': '0'  # Default value for ld2
    }
    
    for i, part in enumerate(parts):
        if part in ['cora', 'citeseer', 'PubMed']:
            data['dataset'] = part
        elif part in ['APPNP', 'GAT', 'GCN', 'GraphSAGE', 'DAGNN']:
            data['model'] = part
        elif part in ['aps', 'raps']:
            data['conformal_score'] = part
        elif part in ['SRGNN', 'cmd', 'MMD','CEMD', 'CKLJS','CKLJSEM', 'CKL', 'CJS','kld', 'jsd', 'emd', 'CE']:
            data['method'] = part
        elif part == 'alpha' and i + 1 < len(parts) and parts[i + 1] == 'bias':
            data['alpha_bias'] = parts[i + 2]
        elif part == 'ld1' and i + 1 < len(parts):
            data['ld1'] = parts[i + 1]
        elif part == 'ld2' and i + 1 < len(parts):
            data['ld2'] = parts[i + 1]
    
    # Override the default alpha_bias if explicitly mentioned in the filename
    for part in parts:
        if part in ['0.2', '0.4', '0.6'] and dataset_type == 'biased':
            data['alpha_bias'] = part
    
    return data

def process_files(folder_path):
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                content = file.read()
                lines = content.strip().split('\n')
                values = [extract_values(line) for line in lines if line.strip()]
                
                if values:
                    _, *metric_values = zip(*values)
                    means = np.mean(metric_values, axis=1)
                    stds = np.std(metric_values, axis=1)
                    result = [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(means, stds)]
                    
                    file_info = parse_filename(filename)
                    data.append(list(file_info.values()) + result)
    
    return data

def create_csv(data, output_file):
    columns = ['Dataset Type', 'Dataset', 'Model', 'Conformal Score', 'Method', 'Alpha Bias', 'LD1', 'LD2',
               'Accuracy', '90% Coverage', 'Prediction Size (90% Coverage)', '95% Coverage', 'Prediction Size (95% Coverage)']
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(output_file, index=False)
    print(f"CSV file created: {output_file}")

# Usage
folder_path = r'D:\csr\CondSR\pred'  # Using raw string for Windows path
output_file = 'comprehensive_predictions_summary_4.csv'

data = process_files(folder_path)
create_csv(data, output_file)


CSV file created: comprehensive_predictions_summary_4.csv


In [5]:
import pandas as pd

# Load the CSV file
file_path = 'comprehensive_predictions_summary_citeseer.csv'
data = pd.read_csv(file_path)

# Function to extract mean value from the string
def extract_mean(value):
    return float(value.split(' ± ')[0])

# Apply the function to relevant columns to get mean values
data['Accuracy_mean'] = data['Accuracy'].apply(extract_mean)
data['Prediction_Size_90_mean'] = data['Prediction Size (90% Coverage)'].apply(extract_mean)
data['Coverage_90_mean'] = data['90% Coverage'].apply(extract_mean)

# Save the updated CSV
updated_file_path = 'comprehensive_predictions_summary_citeseer.csv'
data.to_csv(updated_file_path, index=False)
print(f"Updated CSV file created: {updated_file_path}")


Updated CSV file created: comprehensive_predictions_summary_citeseer.csv


In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('comprehensive_predictions_summary_4.csv')

# Filter the data to include only the relevant columns
df = df[['Dataset Type', 'Model', 'Method', 'Accuracy', 'Accuracy_mean']]

# Separate the unbiased (IID), biased (CE), and other methods
iid_df = df[df['Dataset Type'] == 'unbiased']
biased_ce_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] == 'biased(CE)')]
methods_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] != 'biased(CE)')]

# Merge the dataframes
merged_df = pd.merge(methods_df, biased_ce_df, on='Model', suffixes=('_method', '_biased_ce'))
merged_df = pd.merge(merged_df, iid_df, on='Model', suffixes=('', '_iid'))

# Calculate the percentage improvement compared to Biased (CE)
merged_df['Improvement (%)'] = ((merged_df['Accuracy_mean_method'] - merged_df['Accuracy_mean_biased_ce']) / merged_df['Accuracy_mean_biased_ce']) * 100

# Format the accuracy and improvement
merged_df['Accuracy (Method)'] = merged_df.apply(lambda x: f"{x['Accuracy_method'].split(' ± ')[0]} ± {x['Accuracy_method'].split(' ± ')[1]} ({'↑' if x['Improvement (%)'] > 0 else '↓'} {abs(x['Improvement (%)']):.2f}%)", axis=1)

# Pivot the table to have methods as columns
pivot_df = merged_df.pivot(index='Model', columns='Method_method', values='Accuracy (Method)')

# Add IID and Biased (CE) columns
pivot_df['IID Accuracy'] = iid_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")
pivot_df['Biased (CE) Accuracy'] = biased_ce_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")

# Reorder columns
pivot_df = pivot_df[['IID Accuracy', 'Biased (CE) Accuracy'] + list(pivot_df.columns[:-2])]

# Display the final table
print(pivot_df)

# Optionally, save the table to a CSV file for further use
pivot_df.to_csv('accuracy_comparison_table.csv')

ValueError: Index contains duplicate entries, cannot reshape

In [2]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('comprehensive_predictions_summary_4.csv')

# Filter the data to include only the relevant columns
df = df[['Dataset Type', 'Model', 'Method', 'Accuracy', 'Accuracy_mean']]

# Separate the unbiased (IID), biased (CE), and other methods
iid_df = df[df['Dataset Type'] == 'unbiased']
biased_ce_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] == 'biased(CE)')]
methods_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] != 'biased(CE)')]

# Merge the dataframes
merged_df = pd.merge(methods_df, biased_ce_df, on='Model', suffixes=('_method', '_biased_ce'))
merged_df = pd.merge(merged_df, iid_df, on='Model', suffixes=('', '_iid'))

# Check for duplicates
if merged_df[['Model', 'Method_method']].duplicated().any():
    # Handle duplicates by grouping and aggregating (e.g., using mean for numeric columns)
    merged_df = merged_df.groupby(['Model', 'Method_method'], as_index=False).mean()

# Calculate the percentage improvement compared to Biased (CE)
merged_df['Improvement (%)'] = (
    (merged_df['Accuracy_mean_method'] - merged_df['Accuracy_mean_biased_ce'])
    / merged_df['Accuracy_mean_biased_ce']
) * 100

# Format the accuracy and improvement
merged_df['Accuracy (Method)'] = merged_df.apply(
    lambda x: f"{x['Accuracy_method'].split(' ± ')[0]} ± {x['Accuracy_method'].split(' ± ')[1]} "
              f"({'↑' if x['Improvement (%)'] > 0 else '↓'} {abs(x['Improvement (%)']):.2f}%)",
    axis=1
)

# Pivot the table to have methods as columns
pivot_df = merged_df.pivot(index='Model', columns='Method_method', values='Accuracy (Method)')

# Add IID and Biased (CE) columns
pivot_df['IID Accuracy'] = iid_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")
pivot_df['Biased (CE) Accuracy'] = biased_ce_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")

# Reorder columns
pivot_df = pivot_df[['IID Accuracy', 'Biased (CE) Accuracy'] + list(pivot_df.columns[:-2])]

# Display the final table
print(pivot_df)

# Save the table to a CSV file for further use
pivot_df.to_csv('accuracy_comparison_table.csv')


TypeError: agg function failed [how->mean,dtype->object]

In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('comprehensive_predictions_summary_4.csv')

# Filter the data to include only the relevant columns
df = df[['Dataset Type', 'Model', 'Method', 'Accuracy', 'Accuracy_mean']]

# Separate the unbiased (IID), biased (CE), and other methods
iid_df = df[df['Dataset Type'] == 'unbiased']
biased_ce_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] == 'biased(CE)')]
methods_df = df[(df['Dataset Type'] == 'biased') & (df['Method'] != 'biased(CE)')]

# Merge the dataframes
merged_df = pd.merge(methods_df, biased_ce_df, on='Model', suffixes=('_method', '_biased_ce'))
merged_df = pd.merge(merged_df, iid_df, on='Model', suffixes=('', '_iid'))

# Drop duplicate rows
merged_df = merged_df.drop_duplicates(subset=['Model', 'Method_method'])

# Calculate the percentage improvement compared to Biased (CE)
merged_df['Improvement (%)'] = (
    (merged_df['Accuracy_mean_method'] - merged_df['Accuracy_mean_biased_ce'])
    / merged_df['Accuracy_mean_biased_ce']
) * 100

# Format the accuracy and improvement
merged_df['Accuracy (Method)'] = merged_df.apply(
    lambda x: f"{x['Accuracy_method'].split(' ± ')[0]} ± {x['Accuracy_method'].split(' ± ')[1]} "
              f"({'↑' if x['Improvement (%)'] > 0 else '↓'} {abs(x['Improvement (%)']):.2f}%)",
    axis=1
)

# Pivot the table to have methods as columns
pivot_df = merged_df.pivot(index='Model', columns='Method_method', values='Accuracy (Method)')

# Add IID and Biased (CE) columns
pivot_df['IID Accuracy'] = iid_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")
pivot_df['Biased (CE) Accuracy'] = biased_ce_df.set_index('Model')['Accuracy'].apply(lambda x: f"{x.split(' ± ')[0]}")

# Reorder columns
pivot_df = pivot_df[['IID Accuracy', 'Biased (CE) Accuracy'] + list(pivot_df.columns[:-2])]

# Display the final table
print(pivot_df)

# Save the table to a CSV file for further use
pivot_df.to_csv('accuracy_comparison_table_cora.csv')


Method_method IID Accuracy Biased (CE) Accuracy                        CJS  \
Model                                                                        
APPNP               0.8562               0.7101  0.7278 ± 0.0214 (↑ 2.49%)   
DAGNN               0.8229               0.7179  0.7232 ± 0.0181 (↑ 0.74%)   
GAT                 0.8080               0.6861  0.6771 ± 0.0313 (↓ 1.31%)   
GCN                 0.7991               0.6675  0.6780 ± 0.0045 (↑ 1.57%)   
GraphSAGE           0.8084               0.6924  0.7344 ± 0.0101 (↑ 6.07%)   

Method_method                        CKL                        MMD  \
Model                                                                 
APPNP          0.7318 ± 0.0219 (↑ 3.06%)  0.7175 ± 0.0163 (↑ 1.04%)   
DAGNN          0.7345 ± 0.0113 (↑ 2.31%)  0.7187 ± 0.0020 (↑ 0.11%)   
GAT            0.6847 ± 0.0295 (↓ 0.20%)  0.6801 ± 0.0147 (↓ 0.87%)   
GCN            0.6812 ± 0.0050 (↑ 2.05%)  0.6672 ± 0.0069 (↓ 0.04%)   
GraphSAGE      0.7282 ± 0.0

In [20]:
import pandas as pd

# Load the dataset
df = pd.read_csv('comprehensive_predictions_summary_citeseer.csv')

# First, let's verify we have all required columns
required_columns = ['Model', 'Method', '90% Coverage']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"Missing required columns: {', '.join(set(required_columns) - set(df.columns))}")

# Convert '90% Coverage' to numeric, removing any non-numeric characters
df['90% Coverage'] = df['90% Coverage'].str.extract(r'([\d.]+)').astype(float)

# Separate data by method types
iid_df = df[df['Method'] == 'IID'].copy()
biased_df = df[df['Method'] == 'biased(CE)'].copy()
methods_df = df[~df['Method'].isin(['IID', 'biased(CE)'])].copy()

# First merge: methods with IID
temp_df = methods_df.merge(
    iid_df[['Model', '90% Coverage']].rename(columns={'90% Coverage': 'Coverage_IID'}),
    on='Model'
)

# Second merge: add biased results
merged_df = temp_df.merge(
    biased_df[['Model', '90% Coverage']].rename(columns={'90% Coverage': 'Coverage_Biased'}),
    on='Model'
)

# Calculate improvement percentage
merged_df['Improvement (%)'] = (
    (merged_df['90% Coverage'] - merged_df['Coverage_Biased']) /
    merged_df['Coverage_Biased']
) * 100

# Format the comparison string
merged_df['Coverage (Method)'] = merged_df.apply(
    lambda x: f"{x['90% Coverage']:.4f} ({'↑' if x['Improvement (%)'] > 0 else '↓'} {abs(x['Improvement (%)']):.2f}%)",
    axis=1
)

# Drop duplicates to ensure uniqueness before pivoting
merged_df = merged_df.drop_duplicates(subset=['Model', 'Method'])

# Create pivot table
pivot_df = merged_df.pivot(
    index='Model',
    columns='Method',
    values='Coverage (Method)'
)

# Add IID and Biased (CE) coverage columns
pivot_df['IID Coverage'] = iid_df.set_index('Model')['90% Coverage']
pivot_df['Biased (CE) Coverage'] = biased_df.set_index('Model')['90% Coverage']

# Reorder columns to put IID and Biased (CE) first
cols = pivot_df.columns.tolist()
cols = ['IID Coverage', 'Biased (CE) Coverage'] + [c for c in cols if c not in ['IID Coverage', 'Biased (CE) Coverage']]
pivot_df = pivot_df[cols]

# Save the final output
pivot_df.to_csv('comprehensive_coverage_comparison_citeseer.csv')
print("Comparison CSV file saved: comprehensive_coverage_comparison_citeseer.csv")

# Display first few rows of the pivot table
print("\nFirst few rows of the comparison table:")
print(pivot_df.head())

Comparison CSV file saved: comprehensive_coverage_comparison_citeseer.csv

First few rows of the comparison table:
Method     IID Coverage  Biased (CE) Coverage               CJS  \
Model                                                             
APPNP            0.9007                0.9022  0.9020 (↓ 0.02%)   
DAGNN            0.9020                0.9029  0.9021 (↓ 0.09%)   
GAT              0.9011                0.9013  0.9021 (↑ 0.09%)   
GCN              0.9009                0.9031  0.9029 (↓ 0.02%)   
GraphSAGE        0.9004                0.9015  0.9018 (↑ 0.03%)   

Method                  CKL               MMD             SRGNN  \
Model                                                             
APPNP      0.9019 (↓ 0.03%)  0.9023 (↑ 0.01%)  0.9025 (↑ 0.03%)   
DAGNN      0.9019 (↓ 0.11%)  0.9024 (↓ 0.06%)  0.9017 (↓ 0.13%)   
GAT        0.9023 (↑ 0.11%)  0.9015 (↑ 0.02%)  0.9018 (↑ 0.06%)   
GCN        0.9030 (↓ 0.01%)  0.9033 (↑ 0.02%)  0.9028 (↓ 0.03%)   
GraphSAGE  0.

In [25]:
import pandas as pd

# Load the dataset
df = pd.read_csv('comprehensive_predictions_summary_4.csv')

# First, let's verify we have all required columns
required_columns = ['Model', 'Method', 'Prediction Size (90% Coverage)']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"Missing required columns: {', '.join(set(required_columns) - set(df.columns))}")

# Convert '90% Coverage' to numeric, removing any non-numeric characters
df['Prediction Size (90% Coverage)'] = df['Prediction Size (90% Coverage)'].str.extract(r'([\d.]+)').astype(float)

# Separate data by method types
iid_df = df[df['Method'] == 'IID'].copy()
biased_df = df[df['Method'] == 'biased(CE)'].copy()
methods_df = df[~df['Method'].isin(['IID', 'biased(CE)'])].copy()

# First merge: methods with IID
temp_df = methods_df.merge(
    iid_df[['Model', 'Prediction Size (90% Coverage)']].rename(columns={'Prediction Size (90% Coverage)': 'Prediction Size (90% Coverage)_IID'}),
    on='Model'
)

# Second merge: add biased results
merged_df = temp_df.merge(
    biased_df[['Model', 'Prediction Size (90% Coverage)']].rename(columns={'Prediction Size (90% Coverage)': 'Prediction Size (90% Coverage)_Biased'}),
    on='Model'
)

# Calculate improvement percentage
merged_df['Improvement (%)'] = (
    (merged_df['Prediction Size (90% Coverage)'] - merged_df['Prediction Size (90% Coverage)_Biased']) /
    merged_df['Prediction Size (90% Coverage)_Biased']
) * 100

# Format the comparison string
merged_df['Prediction Size (90% Coverage) (Method)'] = merged_df.apply(
    lambda x: f"{x['Prediction Size (90% Coverage)']:.4f} ({'↑' if x['Improvement (%)'] > 0 else '↓'} {abs(x['Improvement (%)']):.2f}%)",
    axis=1
)

# Drop duplicates to ensure uniqueness before pivoting
merged_df = merged_df.drop_duplicates(subset=['Model', 'Method'])

# Create pivot table
pivot_df = merged_df.pivot(
    index='Model',
    columns='Method',
    values='Prediction Size (90% Coverage) (Method)'
)

# Add IID and Biased (CE) coverage columns
pivot_df['IID Prediction Size (90% Coverage)'] = iid_df.set_index('Model')['Prediction Size (90% Coverage)']
pivot_df['Biased (CE) Prediction Size (90% Coverage)'] = biased_df.set_index('Model')['Prediction Size (90% Coverage)']

# Reorder columns to put IID and Biased (CE) first
cols = pivot_df.columns.tolist()
cols = ['IID Prediction Size (90% Coverage)', 'Biased (CE) Prediction Size (90% Coverage)'] + [c for c in cols if c not in ['IID Prediction Size (90% Coverage)', 'Biased (CE) Prediction Size (90% Coverage)']]
pivot_df = pivot_df[cols]

# Save the final output
pivot_df.to_csv('comprehensive_Prediction Size (90% Coverage)_comparison_cora.csv')
print("Comparison CSV file saved: comprehensive_Prediction Size (90% Coverage)_comparison_cora.csv")

# Display first few rows of the pivot table
print("\nFirst few rows of the comparison table:")
print(pivot_df.head())

Comparison CSV file saved: comprehensive_Prediction Size (90% Coverage)_comparison_cora.csv

First few rows of the comparison table:
Method     IID Prediction Size (90% Coverage)  \
Model                                           
APPNP                                  3.8159   
DAGNN                                  4.5173   
GAT                                    3.4103   
GCN                                    3.8876   
GraphSAGE                              3.3313   

Method     Biased (CE) Prediction Size (90% Coverage)                CJS  \
Model                                                                      
APPNP                                          4.1407  2.2595 (↓ 45.43%)   
DAGNN                                          4.7468  3.2271 (↓ 32.02%)   
GAT                                            3.9805  2.7659 (↓ 30.51%)   
GCN                                            4.1933   4.1019 (↓ 2.18%)   
GraphSAGE                                      3.6772  2.1970 (↓ 40