In [1]:
# Import necessary libraries
import torch
import numpy as np
import pandas as pd
import ast
from types import SimpleNamespace

In [2]:
# Import custom modules
from GNN_architectures import create_gnn_model
from GraphBuilder_with_features import GraphBuilder, create_graph_dataset
from training_utils import train, compare_configurations

In [3]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [4]:
# Load data
def load_graph_data(loop):
    """Load graph data from CSV files."""
    edges = []
    y = []
    
    for i in range(loop, loop + 1):
        filename = f'../Graph_Edge_Data/den_graph_data_{loop}.csv'
        df = pd.read_csv(filename)
        edges += df['EDGES'].tolist()
        y += df['COEFFICIENTS'].tolist()
    
    edges = [ast.literal_eval(e) for e in edges]
    graphs_data = list(zip(edges, y))
    return graphs_data

In [5]:
# Define different feature configurations to test
def get_feature_configs():
    """Define different feature configurations for experiments."""
    return {
        'minimal': {
            'selected_features': ['basic', 'face'],
            'laplacian_pe_k': 0
        },
        'planar_focused': {
            'selected_features': ['basic', 'face', 'dual'],
            'laplacian_pe_k': 2
        },
        'balanced': {
            'selected_features': ['basic', 'face', 'spectral_node', 'centrality'],
            'laplacian_pe_k': 3
        },
        'full': {
            'selected_features': ['basic', 'face', 'spectral_node', 'dual', 'centrality'],
            'laplacian_pe_k': 4
        }
    }

In [6]:
# Main experiment configuration
def run_single_experiment(config,dataset ):
    """Run a single experiment with specified configuration."""

    config.in_channels = dataset[0].x.shape[1]
    
    print(f"Dataset created with {len(dataset)} graphs")
    print(f"Feature dimensions: {config.in_channels}")
    print(f"Feature names: {dataset[0].feature_names}")
    
    # Train model
    results = train(config, dataset)
    
    return results

In [None]:


# Compare multiple configurations
def run_comparison_experiment():
    """Compare different feature configurations."""
    
    # Load data
    graphs_data = load_graph_data(loop=8)
    
    # Base configuration
    base_config = {
        'model_name': 'gin',
        'hidden_channels': 64,
        'num_layers': 3,
        'dropout': 0.2,
        'lr': 0.01,
        'weight_decay': 5e-4,
        'epochs': 70,
        'batch_size': 32,
        'k_folds': 4,
        'early_stop_patience': 20,
        'scheduler_type': 'onecycle',
        'use_wandb': True,
        'project': 'gnn-planar-graphs',
        'graphs_data': graphs_data
    }
    
    # Create configurations for different feature sets
    feature_configs = get_feature_configs()
    configs = {}
    
    for name, feat_config in feature_configs.items():
        config = SimpleNamespace(**base_config)
        config.selected_features = feat_config['selected_features']
        config.laplacian_pe_k = feat_config['laplacian_pe_k']
        config.experiment_name = f"{name}_features_{config.model_name}"
        configs[name] = config
    
    # Dataset generator function
    def dataset_generator(config):
        return create_graph_dataset(
            config.graphs_data,
            {
                'selected_features': config.selected_features,
                'laplacian_pe_k': config.laplacian_pe_k
            }
        )
    
    # Run comparison
    results = compare_configurations(configs, dataset_generator)
    
    # Print summary
    print("\n" + "="*60)
    print("SUMMARY: Feature Configuration Comparison")
    print("="*60)
    
    for config_name, res in results.items():
        print(f"\n{config_name}:")
        print(f"  Mean Accuracy: {res['mean_accuracy']:.4f} ± {res['std_accuracy']:.4f}")
        print(f"  Fold Accuracies: {res['fold_accuracies']}")
    
    return results

# Grid search for hyperparameters
def run_hyperparameter_search():
    """Run hyperparameter grid search."""
    
    # Load data
    graphs_data = load_graph_data(loop=8)
    
    # Define hyperparameter grid
    param_grid = {
        'model_name': ['gin', 'gat', 'simple'],
        'hidden_channels': [32, 64, 128],
        'num_layers': [2, 3, 4],
        'dropout': [0.1, 0.2, 0.3],
        'lr': [0.001, 0.01, 0.1]
    }
    
    # Fixed parameters
    fixed_params = {
        'weight_decay': 5e-4,
        'epochs': 50,
        'batch_size': 32,
        'k_folds': 3,
        'early_stop_patience': 15,
        'scheduler_type': 'plateau',
        'selected_features': ['basic', 'face', 'spectral_node', 'centrality'],
        'laplacian_pe_k': 3,
        'use_wandb': True,
        'project': 'gnn-planar-graphs-hyperparam',
        'graphs_data': graphs_data
    }
    
    # Sample random configurations (or do full grid search)
    import itertools
    from random import sample
    
    # Generate all combinations
    keys, values = zip(*param_grid.items())
    all_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    # Sample a subset for demonstration (or use all for full grid search)
    n_trials = min(20, len(all_combinations))
    selected_combinations = sample(all_combinations, n_trials)
    
    best_accuracy = 0
    best_config = None
    
    for i, params in enumerate(selected_combinations):
        print(f"\n{'='*60}")
        print(f"Trial {i+1}/{n_trials}")
        print(f"Parameters: {params}")
        print(f"{'='*60}")
        
        # Create configuration
        config = SimpleNamespace(**fixed_params, **params)
        config.experiment_name = f"trial_{i+1}"
        
        # Create dataset
        dataset, scaler = create_graph_dataset(
            config.graphs_data,
            {
                'selected_features': config.selected_features,
                'laplacian_pe_k': config.laplacian_pe_k
            }
        )
        
        config.dataset = dataset
        config.in_channels = dataset[0].x.shape[1]
        
        # Train model
        results = train(config)
        
        # Track best configuration
        if results['mean_accuracy'] > best_accuracy:
            best_accuracy = results['mean_accuracy']
            best_config = params
    
    print(f"\n{'='*60}")
    print(f"Best configuration found:")
    print(f"  Parameters: {best_config}")
    print(f"  Accuracy: {best_accuracy:.4f}")
    
    return best_config, best_accuracy

# Run experiments
if __name__ == "__main__":
    # Choose which experiment to run
    experiment_type = "single"  # Options: "single", "comparison", "hyperparameter"
    
    if experiment_type == "single":
        results = run_single_experiment()
    elif experiment_type == "comparison":
        results = run_comparison_experiment()
    elif experiment_type == "hyperparameter":
        best_config, best_accuracy = run_hyperparameter_search()

TypeError: run_single_experiment() missing 2 required positional arguments: 'config' and 'dataset'

# Experiments

In [7]:
# Load data
graphs_data = load_graph_data(loop=8)

In [None]:
# Dataset configuration
dataset_config = SimpleNamespace( selected_features=['basic', 'face', 'spectral_node', 'centrality'],
laplacian_pe_k=3) #Balanced configuration

# Create dataset
dataset, scaler = create_graph_dataset( graphs_data,
    {
        'selected_features': dataset_config.selected_features,
        'laplacian_pe_k': dataset_config.laplacian_pe_k
    }
)

Extracting features...
Normalizing features...
Created dataset with 1432 graphs
Feature dimensions: 13
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance', 'fiedler_vector', 'eigenvector_energy', 'third_eigenvector', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'pagerank']


In [None]:
# Create configuration
config = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=64,
    num_layers=3,
    dropout=0.2,
    
    # Training configuration
    lr=0.1,
    weight_decay=5e-4,
    epochs=70,
    batch_size=32,
    k_folds=4,
    early_stop_patience=20,
    scheduler_type='onecycle',
    
    # WandB configuration
    use_wandb=True,
    project='gnn-planar-graphs',
    experiment_name='balanced_features_gin',
)

In [10]:
results = run_single_experiment(config, dataset )

Dataset created with 1432 graphs
Feature dimensions: 13
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance', 'fiedler_vector', 'eigenvector_energy', 'third_eigenvector', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'pagerank']


[34m[1mwandb[0m: Currently logged in as: [33mdian-gabriele[0m ([33mdian-gabriele-desydeutsches-elektronen-synchrotron[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



Fold 1/4
  Epoch 20/70, Train Loss: 0.4030, Val Loss: 0.5454, Val Acc: 0.7933, LR: 0.000419
  Early stopping at epoch 27
  Test Accuracy: 0.7849

Fold 2/4
  Epoch 20/70, Train Loss: 0.4056, Val Loss: 0.4921, Val Acc: 0.7430, LR: 0.000419
  Early stopping at epoch 39
  Test Accuracy: 0.7514

Fold 3/4
  Epoch 20/70, Train Loss: 0.3695, Val Loss: 0.5016, Val Acc: 0.7626, LR: 0.000419
  Epoch 40/70, Train Loss: 0.3432, Val Loss: 0.4942, Val Acc: 0.7682, LR: 0.000474
  Early stopping at epoch 50
  Test Accuracy: 0.7709

Fold 4/4
  Epoch 20/70, Train Loss: 0.4035, Val Loss: 0.4753, Val Acc: 0.7626, LR: 0.000419
  Early stopping at epoch 37
  Test Accuracy: 0.7598

Cross-validation Results:
  Mean Accuracy: 0.7668 ± 0.0126
  All Folds: [0.7849162011173184, 0.7513966480446927, 0.770949720670391, 0.7597765363128491]

Top 10 Most Important Features:
  1. betweenness_centrality   : 0.7936
  2. max_face_size            : 0.5571
  3. pagerank                 : 0.5182
  4. clustering_coefficient   

0,1
epoch,▁▂▂▃▄▅▁▁▁▁▂▂▃▃▄▄▄▅▁▂▃▃▃▄▄▅▅▅▆▆▇▇██▂▄▄▄▅▆
fold,▁▃▆█
fold_0/lr,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▆▆▇▇▇█
fold_0/test_accuracy,▁
fold_0/test_loss,▁
fold_0/train_loss,█▆▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▁▂▂▂▂▁▁▁▁▁
fold_0/val_accuracy,▁▄▆▆▅▆▆▆▇▇▆▆▇▇█▆▇█▇▇▇▆▆▇▇▅▇
fold_0/val_loss,█▃▂▂▂▁▁▂▁▁▂▂▃▂▄▄▄▅▆▆▆▇▅█▅█▆
fold_1/lr,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇██
fold_1/test_accuracy,▁

0,1
epoch,36.0
fold,3.0
fold_0/lr,0.00043
fold_0/test_accuracy,0.78492
fold_0/test_loss,0.55028
fold_0/train_loss,0.3774
fold_0/val_accuracy,0.78492
fold_0/val_loss,0.55028
fold_1/lr,0.00047
fold_1/test_accuracy,0.7514


In [None]:
# Run experiments
if __name__ == "__main__":
    # Choose which experiment to run
    experiment_type = "single"  # Options: "single", "comparison", "hyperparameter"
    
    if experiment_type == "single":
        results = run_single_experiment()
    elif experiment_type == "comparison":
        results = run_comparison_experiment()
    elif experiment_type == "hyperparameter":
        best_config, best_accuracy = run_hyperparameter_search()