# SyNGLER Baseline Evaluation Demo

This notebook demonstrates our comprehensive evaluation framework for network generation baselines. We evaluate four baselines (SyNGLER-Diff, SyNGLER-Res, GRAN, EDGE, VGAE) and ER on four datasets (DBLP, PolBlogs, Yelp, YouTube) using various network metrics.

In [None]:
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
from pathlib import Path

# Add utils to path
sys.path.append('./evaluation')
from utils import *

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

## Configuration

We configure the evaluation parameters including datasets, baselines, number of samples, and data paths.

In [None]:
# Configuration
datasets = ['dblp', 'polblogs', 'yelp', 'youtube']
baselines = ['diff', 'res', 'gran', 'edge', 'vgae', 'er']
num_samples = 5  # Number of synthetic samples to evaluate per baseline

# Paths
data_root = "../../datasets"
synthetic_root = "../../synthetic"

print(f"Datasets: {datasets}")
print(f"Baselines: {baselines}")
print(f"Number of samples per baseline: {num_samples}")

## Load Real Data

We load the real network datasets that will serve as ground truth for our evaluation.

In [None]:
# Load real datasets
real_data = {}
for dataset in datasets:
    try:
        real_data[dataset] = load_real_data(dataset, data_root)
        print(f"Loaded {dataset}: shape {real_data[dataset].shape}")
    except FileNotFoundError as e:
        print(f"Error loading {dataset}: {e}")
        real_data[dataset] = None

## Load Synthetic Data and Compute Metrics

We load synthetic data from all baselines and compute comprehensive evaluation metrics for comparison.

In [None]:
# Initialize results storage
all_results = {}

for dataset in datasets:
    if real_data[dataset] is None:
        print(f"Skipping {dataset} - no real data available")
        continue
        
    print(f"\n=== Evaluating {dataset.upper()} ===")
    all_results[dataset] = {}
    
    for baseline in baselines:
        print(f"\n--- {baseline.upper()} ---")
        
        try:
            # Load synthetic data
            if baseline == 'er':
                # Generate ER samples
                synthetic_data = []
                for i in range(num_samples):
                    A_er, p_hat = er_resample_gnp(real_data[dataset], B=1, seed=42+i)
                    synthetic_data.append(A_er)
                print(f"Generated {len(synthetic_data)} ER samples with p={p_hat:.4f}")
            else:
                synthetic_data = load_synthetic_data(baseline, dataset, synthetic_root, num_samples)
                print(f"Loaded {len(synthetic_data)} synthetic samples")
            
            if len(synthetic_data) == 0:
                print(f"No synthetic data found for {baseline}")
                continue
                
            # Compute metrics
            metrics = compute_metrics(real_data[dataset], synthetic_data, device=device)
            all_results[dataset][baseline] = metrics
            
            # Print summary
            print(f"Triangle Density: Real={metrics['triangle_density']['real']:.4f}, Syn={metrics['triangle_density']['synthetic_mean']:.4f}±{metrics['triangle_density']['synthetic_std']:.4f}")
            print(f"Global Clustering: Real={metrics['global_clustering']['real']:.4f}, Syn={metrics['global_clustering']['synthetic_mean']:.4f}±{metrics['global_clustering']['synthetic_std']:.4f}")
            print(f"Degree Centrality Energy Distance: {metrics['degree_centrality_energy']['energy_distance']:.4f}")
            print(f"Eigenvalues Energy Distance: {metrics['eigenvalues_energy']['energy_distance']:.4f}")
            
        except Exception as e:
            print(f"Error processing {baseline} for {dataset}: {e}")
            all_results[dataset][baseline] = None

## Create Summary Tables

We create comprehensive summary tables that compare baseline performance across all metrics and datasets.

In [None]:
# Create summary tables for each metric
def create_summary_table(metric_name, metric_key, datasets, baselines, all_results):
    """Create a summary table for a specific metric"""
    data = []
    
    for dataset in datasets:
        if dataset not in all_results:
            continue
            
        row = {'Dataset': dataset.upper()}
        
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                if metric_key in all_results[dataset][baseline][metric_name]:
                    value = all_results[dataset][baseline][metric_name][metric_key]
                    if isinstance(value, (int, float)):
                        row[baseline.upper()] = f"{value:.4f}"
                    else:
                        row[baseline.upper()] = str(value)
                else:
                    row[baseline.upper()] = "N/A"
            else:
                row[baseline.upper()] = "N/A"
        
        data.append(row)
    
    return pd.DataFrame(data)

# Create tables for different metrics
print("\n" + "="*80)
print("SUMMARY TABLES")
print("="*80)

# Triangle Density
print("\n1. TRIANGLE DENSITY (Real Values)")
print("-"*50)
tri_density_table = create_summary_table('triangle_density', 'real', datasets, baselines, all_results)
print(tri_density_table.to_string(index=False))

# Global Clustering Coefficient
print("\n2. GLOBAL CLUSTERING COEFFICIENT (Real Values)")
print("-"*50)
gcc_table = create_summary_table('global_clustering', 'real', datasets, baselines, all_results)
print(gcc_table.to_string(index=False))

# Degree Centrality Energy Distance
print("\n3. DEGREE CENTRALITY ENERGY DISTANCE (Lower is Better)")
print("-"*50)
degree_energy_table = create_summary_table('degree_centrality_energy', 'energy_distance', datasets, baselines, all_results)
print(degree_energy_table.to_string(index=False))

# Eigenvalues Energy Distance
print("\n4. EIGENVALUES ENERGY DISTANCE (Lower is Better)")
print("-"*50)
eigenvals_energy_table = create_summary_table('eigenvalues_energy', 'energy_distance', datasets, baselines, all_results)
print(eigenvals_energy_table.to_string(index=False))

## Create Synthetic vs Real Comparison Tables

We create detailed comparison tables that show synthetic vs real values with error analysis.

In [None]:
# Create comparison tables showing synthetic vs real values
def create_comparison_table(metric_name, datasets, baselines, all_results):
    """Create a comparison table showing synthetic vs real values"""
    data = []
    
    for dataset in datasets:
        if dataset not in all_results:
            continue
            
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                if metric_name in all_results[dataset][baseline]:
                    real_val = all_results[dataset][baseline][metric_name]['real']
                    syn_mean = all_results[dataset][baseline][metric_name]['synthetic_mean']
                    syn_std = all_results[dataset][baseline][metric_name]['synthetic_std']
                    
                    row = {
                        'Dataset': dataset.upper(),
                        'Baseline': baseline.upper(),
                        'Real': f"{real_val:.4f}",
                        'Synthetic': f"{syn_mean:.4f}±{syn_std:.4f}",
                        'Difference': f"{syn_mean - real_val:+.4f}",
                        'Relative_Error': f"{((syn_mean - real_val) / real_val * 100):+.2f}%"
                    }
                    data.append(row)
    
    return pd.DataFrame(data)

print("\n" + "="*100)
print("SYNTHETIC vs REAL COMPARISON")
print("="*100)

# Triangle Density Comparison
print("\n1. TRIANGLE DENSITY COMPARISON")
print("-"*80)
tri_density_comp = create_comparison_table('triangle_density', datasets, baselines, all_results)
print(tri_density_comp.to_string(index=False))

# Global Clustering Comparison
print("\n2. GLOBAL CLUSTERING COEFFICIENT COMPARISON")
print("-"*80)
gcc_comp = create_comparison_table('global_clustering', datasets, baselines, all_results)
print(gcc_comp.to_string(index=False))

## Visualizations

We provide comprehensive visualizations to analyze baseline performance across different metrics and datasets.

In [None]:
# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Baseline Performance Comparison', fontsize=16, fontweight='bold')

# 1. Triangle Density Comparison
ax1 = axes[0, 0]
tri_density_data = []
for dataset in datasets:
    if dataset in all_results:
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                real_val = all_results[dataset][baseline]['triangle_density']['real']
                syn_val = all_results[dataset][baseline]['triangle_density']['synthetic_mean']
                tri_density_data.append({
                    'Dataset': dataset.upper(),
                    'Baseline': baseline.upper(),
                    'Real': real_val,
                    'Synthetic': syn_val,
                    'Error': abs(syn_val - real_val)
                })

if tri_density_data:
    df_tri = pd.DataFrame(tri_density_data)
    sns.barplot(data=df_tri, x='Dataset', y='Error', hue='Baseline', ax=ax1)
    ax1.set_title('Triangle Density Error')
    ax1.set_ylabel('Absolute Error')
    ax1.tick_params(axis='x', rotation=45)

# 2. Global Clustering Comparison
ax2 = axes[0, 1]
gcc_data = []
for dataset in datasets:
    if dataset in all_results:
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                real_val = all_results[dataset][baseline]['global_clustering']['real']
                syn_val = all_results[dataset][baseline]['global_clustering']['synthetic_mean']
                gcc_data.append({
                    'Dataset': dataset.upper(),
                    'Baseline': baseline.upper(),
                    'Real': real_val,
                    'Synthetic': syn_val,
                    'Error': abs(syn_val - real_val)
                })

if gcc_data:
    df_gcc = pd.DataFrame(gcc_data)
    sns.barplot(data=df_gcc, x='Dataset', y='Error', hue='Baseline', ax=ax2)
    ax2.set_title('Global Clustering Error')
    ax2.set_ylabel('Absolute Error')
    ax2.tick_params(axis='x', rotation=45)

# 3. Energy Distance Comparison
ax3 = axes[1, 0]
energy_data = []
for dataset in datasets:
    if dataset in all_results:
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                degree_energy = all_results[dataset][baseline]['degree_centrality_energy']['energy_distance']
                eigenvals_energy = all_results[dataset][baseline]['eigenvalues_energy']['energy_distance']
                energy_data.append({
                    'Dataset': dataset.upper(),
                    'Baseline': baseline.upper(),
                    'Degree_Energy': degree_energy,
                    'Eigenvals_Energy': eigenvals_energy,
                    'Total_Energy': degree_energy + eigenvals_energy
                })

if energy_data:
    df_energy = pd.DataFrame(energy_data)
    sns.barplot(data=df_energy, x='Dataset', y='Total_Energy', hue='Baseline', ax=ax3)
    ax3.set_title('Total Energy Distance (Lower is Better)')
    ax3.set_ylabel('Energy Distance')
    ax3.tick_params(axis='x', rotation=45)

# 4. Combined Performance
ax4 = axes[1, 1]
combined_data = []
for dataset in datasets:
    if dataset in all_results:
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                tri_error = abs(all_results[dataset][baseline]['triangle_density']['synthetic_mean'] - 
                               all_results[dataset][baseline]['triangle_density']['real'])
                gcc_error = abs(all_results[dataset][baseline]['global_clustering']['synthetic_mean'] - 
                               all_results[dataset][baseline]['global_clustering']['real'])
                total_energy = (all_results[dataset][baseline]['degree_centrality_energy']['energy_distance'] + 
                               all_results[dataset][baseline]['eigenvalues_energy']['energy_distance'])
                combined_score = tri_error + gcc_error + total_energy
                combined_data.append({
                    'Dataset': dataset.upper(),
                    'Baseline': baseline.upper(),
                    'Combined_Score': combined_score
                })

if combined_data:
    df_combined = pd.DataFrame(combined_data)
    sns.barplot(data=df_combined, x='Dataset', y='Combined_Score', hue='Baseline', ax=ax4)
    ax4.set_title('Combined Performance Score (Lower is Better)')
    ax4.set_ylabel('Combined Score')
    ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Ranking Analysis

We perform comprehensive ranking analysis to identify the best-performing baselines across all datasets and metrics.

In [None]:
# Create ranking analysis
def create_ranking_table(datasets, baselines, all_results):
    """Create a ranking table based on overall performance"""
    ranking_data = []
    
    for dataset in datasets:
        if dataset not in all_results:
            continue
            
        baseline_scores = {}
        
        for baseline in baselines:
            if baseline in all_results[dataset] and all_results[dataset][baseline] is not None:
                # Calculate composite score (lower is better)
                score = 0
                
                # Triangle density error
                real_tri = all_results[dataset][baseline]['triangle_density']['real']
                syn_tri = all_results[dataset][baseline]['triangle_density']['synthetic_mean']
                tri_error = abs(syn_tri - real_tri) / real_tri if real_tri > 0 else 0
                score += tri_error
                
                # Global clustering error
                real_gcc = all_results[dataset][baseline]['global_clustering']['real']
                syn_gcc = all_results[dataset][baseline]['global_clustering']['synthetic_mean']
                gcc_error = abs(syn_gcc - real_gcc) / real_gcc if real_gcc > 0 else 0
                score += gcc_error
                
                # Energy distances (normalized)
                degree_energy = all_results[dataset][baseline]['degree_centrality_energy']['energy_distance']
                eigenvals_energy = all_results[dataset][baseline]['eigenvalues_energy']['energy_distance']
                score += degree_energy + eigenvals_energy
                
                baseline_scores[baseline] = score
        
        # Rank baselines for this dataset
        sorted_baselines = sorted(baseline_scores.items(), key=lambda x: x[1])
        
        for rank, (baseline, score) in enumerate(sorted_baselines, 1):
            ranking_data.append({
                'Dataset': dataset.upper(),
                'Rank': rank,
                'Baseline': baseline.upper(),
                'Score': f"{score:.4f}"
            })
    
    return pd.DataFrame(ranking_data)

print("\n" + "="*60)
print("RANKING ANALYSIS (Lower Score = Better Performance)")
print("="*60)

ranking_table = create_ranking_table(datasets, baselines, all_results)
print(ranking_table.to_string(index=False))

# Summary statistics
print("\n" + "="*40)
print("RANKING SUMMARY")
print("="*40)

rank_counts = ranking_table['Baseline'].value_counts()
print("\nNumber of times each baseline achieved rank 1:")
for baseline in rank_counts.index:
    count = len(ranking_table[(ranking_table['Baseline'] == baseline) & (ranking_table['Rank'] == 1)])
    print(f"{baseline}: {count} times")

print("\nAverage rank by baseline:")
avg_ranks = ranking_table.groupby('Baseline')['Rank'].mean().sort_values()
for baseline, avg_rank in avg_ranks.items():
    print(f"{baseline}: {avg_rank:.2f}")

## Save Results

We save all evaluation results to CSV files for further analysis and reproducibility.

In [None]:
# Save results to CSV files
output_dir = "./results"
os.makedirs(output_dir, exist_ok=True)

# Save summary tables
tri_density_table.to_csv(os.path.join(output_dir, "triangle_density_summary.csv"), index=False)
gcc_table.to_csv(os.path.join(output_dir, "global_clustering_summary.csv"), index=False)
degree_energy_table.to_csv(os.path.join(output_dir, "degree_centrality_energy_summary.csv"), index=False)
eigenvals_energy_table.to_csv(os.path.join(output_dir, "eigenvalues_energy_summary.csv"), index=False)

# Save comparison tables
tri_density_comp.to_csv(os.path.join(output_dir, "triangle_density_comparison.csv"), index=False)
gcc_comp.to_csv(os.path.join(output_dir, "global_clustering_comparison.csv"), index=False)

# Save ranking table
ranking_table.to_csv(os.path.join(output_dir, "ranking_analysis.csv"), index=False)

print(f"\nResults saved to {output_dir}/")
print("Files created:")
for file in os.listdir(output_dir):
    print(f"  - {file}")

## Conclusion

Our evaluation framework provides comprehensive assessment of network generation baselines using multiple metrics:

1. **Triangle Density**: Measures the clustering in the network
2. **Global Clustering Coefficient**: Measures the overall transitivity
3. **Degree Centrality Energy Distance**: Measures the distribution of node degrees
4. **Eigenvalues Energy Distance**: Measures the spectral properties

The results demonstrate how well each baseline captures the structural properties of the real networks. Lower energy distances and smaller absolute errors indicate better performance. Our framework enables systematic comparison and ranking of different network generation approaches.