In [16]:
import pandas as pd
import os

In [17]:
DATA_PATH = "../data/full"
leiden_metrics = pd.read_csv(os.path.join(DATA_PATH, "leiden", "metrics.csv"))
slpa_metrics = pd.read_csv(os.path.join(DATA_PATH, "w_slpa", "metrics.csv"))
reverse_hybrid_metrics = pd.read_csv(os.path.join(DATA_PATH, "reverse_hybrid", "metrics.csv"))

In [18]:
# Combine the three metrics DataFrames, add an 'algo' column, and concatenate with NaN for missing values
import numpy as np

# Helper to add algo column and standardize columns
def add_algo_col(df, algo_name):
    df = df.copy()
    df['algo'] = algo_name
    return df

# If leiden_metrics has a 'level' column, split by level
leiden_dfs = []
if 'level' in leiden_metrics.columns:
    for level in leiden_metrics['level'].unique():
        level_df = leiden_metrics[leiden_metrics['level'] == level].drop('level', axis=1)
        leiden_dfs.append(add_algo_col(level_df, f'leiden_level{level}'))
else:
    leiden_dfs.append(add_algo_col(leiden_metrics, 'leiden'))

slpa_df = add_algo_col(slpa_metrics, 'w_slpa')
reverse_hybrid_df = add_algo_col(reverse_hybrid_metrics, 'reverse_hybrid')

# Combine all DataFrames, aligning columns and filling missing with NaN
all_dfs = leiden_dfs + [slpa_df, reverse_hybrid_df]
combined_metrics = pd.concat(all_dfs, ignore_index=True, sort=True)

In [19]:
# transpose the DataFrame for better readability
combined_metric = combined_metrics.set_index('algo').T

row_order = [
    'modularity', 
    'overlapping_modularity', 
    'conductance', 
    'num_communities', 
    'avg_community_size', 
    'min_community_size', 
    'max_community_size', 
    'num_nodes_in_communities', 
    # 'nmi_epitype', 
    # 'nmi_subtype', 
    # 'ari_epitype', 
    # 'ari_subtype'
]

combined_metric.loc['overlapping_modularity', 'reverse_hybrid'] = np.nan
# Reorder the rows based on the specified order
combined_metrics  = combined_metric.loc[row_order]


In [20]:
combined_metrics

algo,leiden_level0,leiden_level1,leiden_level2,leiden_level3,w_slpa,reverse_hybrid
modularity,0.639343,0.430204,0.158592,-0.011439,0.61232,0.398796
overlapping_modularity,,,,,0.466719,
conductance,0.011331,0.485594,0.677233,0.805265,0.164706,0.557392
num_communities,3.0,8.0,12.0,2.0,4.0,10.0
avg_community_size,336.0,126.0,73.0,56.5,265.5,106.2
min_community_size,286.0,60.0,47.0,34.0,66.0,29.0
max_community_size,372.0,227.0,114.0,79.0,379.0,231.0
num_nodes_in_communities,1008.0,1008.0,876.0,113.0,1008.0,1008.0


In [21]:
# for modularity, overlapping_modularity (non nan) and conductance use 3 decimal places
# for num_communities, min_community_size, max_community_size, num_nodes_in_communities use int, no decimal places
# for avg_community_size use 1 decimal places
def format_value(value):
    if pd.isna(value):
        return 'NaN'
    elif isinstance(value, float):
        if value.is_integer():
            return f"{int(value)}"
        elif value < 1:
            return f"{value:.3f}"
        else:
            return f"{value:.1f}"
    elif isinstance(value, int):
        return str(value)
    else:
        return str(value)
    

# Apply formatting to the DataFrame
formatted_metrics = combined_metrics.applymap(format_value)
formatted_metrics

  formatted_metrics = combined_metrics.applymap(format_value)


algo,leiden_level0,leiden_level1,leiden_level2,leiden_level3,w_slpa,reverse_hybrid
modularity,0.639,0.43,0.159,-0.011,0.612,0.399
overlapping_modularity,,,,,0.467,
conductance,0.011,0.486,0.677,0.805,0.165,0.557
num_communities,3.0,8.0,12.0,2.0,4.0,10.0
avg_community_size,336.0,126.0,73.0,56.5,265.5,106.2
min_community_size,286.0,60.0,47.0,34.0,66.0,29.0
max_community_size,372.0,227.0,114.0,79.0,379.0,231.0
num_nodes_in_communities,1008.0,1008.0,876.0,113.0,1008.0,1008.0
