In [1]:
# Import necessary libraries
import torch
import numpy as np
import pandas as pd
import ast
from types import SimpleNamespace

In [2]:
# Import custom modules
from GraphBuilder_with_features import create_graph_dataset
from training_utils import train

In [3]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [4]:
# Load data
def load_graph_data(loop):
    """Load graph data from CSV files."""
    edges = []
    y = []
    
    for i in range(loop, loop + 1):
        filename = f'../Graph_Edge_Data/den_graph_data_{loop}.csv'
        df = pd.read_csv(filename)
        edges += df['EDGES'].tolist()
        y += df['COEFFICIENTS'].tolist()
    
    edges = [ast.literal_eval(e) for e in edges]
    graphs_data = list(zip(edges, y))
    return graphs_data

In [5]:
# Define different feature configurations to test
def get_feature_configs():
    """Define different feature configurations for experiments."""
    return {
        'minimal': {
            'selected_features': ['basic', 'face'],
            'laplacian_pe_k': 0
        },
        'planar_focused': {
            'selected_features': ['basic', 'face', 'dual'],
            'laplacian_pe_k': 2
        },
        'balanced': {
            'selected_features': ['basic', 'face', 'spectral_node', 'centrality'],
            'laplacian_pe_k': 3
        },
        'full': {
            'selected_features': ['basic', 'face', 'spectral_node', 'dual', 'centrality'],
            'laplacian_pe_k': 4
        }
    }

In [6]:
# Main experiment configuration
def run_single_experiment(config,dataset ):
    """Run a single experiment with specified configuration."""

    config.in_channels = dataset[0].x.shape[1]
    
    print(f"Dataset created with {len(dataset)} graphs")
    print(f"Feature dimensions: {config.in_channels}")
    print(f"Feature names: {dataset[0].feature_names}")
    
    # Train model
    results = train(config, dataset)
    
    return results

# Experiments

In [7]:
# Load data
graphs_data = load_graph_data(loop=8)

In [8]:
# Standard Configurations 
feat_conf_full = get_feature_configs()['full'] # configuration with all features
feat_conf_full

{'selected_features': ['basic', 'face', 'spectral_node', 'dual', 'centrality'],
 'laplacian_pe_k': 4}

In [9]:
# Dataset configuration
dataset_config = SimpleNamespace( selected_features=feat_conf_full['selected_features'],
laplacian_pe_k=feat_conf_full['laplacian_pe_k']) 

# Create dataset
dataset, scaler = create_graph_dataset( graphs_data,
    {
        'selected_features': dataset_config.selected_features,
        'laplacian_pe_k': dataset_config.laplacian_pe_k
    }
)

Extracting features...
Normalizing features...
Created dataset with 1432 graphs
Feature dimensions: 18
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance', 'fiedler_vector', 'eigenvector_energy', 'third_eigenvector', 'dual_degree', 'dual_clustering', 'dual_degree_ratio', 'dual_betweenness', 'face_edge_ratio', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'pagerank']


In [10]:
# Create configuration
config = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=64,
    num_layers=4,
    dropout=0.2,
    
    # Training configuration
    lr=0.001,
    weight_decay=5e-4,
    epochs=70,
    batch_size=32,
    early_stop_patience=20,
    scheduler_type= 'onecycle',
    
    # WandB configuration
    use_wandb=True,
    project='gnn-planar-graphs_8',
    experiment_name='basic_oneCyleLR_full_features',
)

In [11]:
onecycle_config = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=64,
    num_layers=3,
    dropout=0.2,
    
    # Training configuration
    lr=0.003,  # Reasonable for OneCycleLR
    weight_decay=5e-4,
    epochs=100,
    batch_size=32,
    scheduler_type='onecycle',
    
    # WandB configuration
    use_wandb=True,
    project='gnn-planar-graphs_9_loops',
    experiment_name='gin_onecycle_fixed',
)

In [12]:
onecycle_config_2 = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=64,
    num_layers=3,
    dropout=0.2,
    
    # Training configuration
    lr=0.03,  # Reasonable for OneCycleLR
    weight_decay=5e-4,
    epochs=100,
    batch_size=32,
    scheduler_type='onecycle',
    
    # WandB configuration
    use_wandb=True,
    project='gnn-planar-graphs_9_loops',
    experiment_name='gin_onecycle_fixed',
)

In [13]:
# 4. Best practices configuration for small planar graphs
best_practices_config = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=48,  # Not too large for small graphs
    num_layers=2,  # 2-3 layers optimal for small graphs
    dropout=0.15,
    
    # Training configuration
    lr=0.002,
    weight_decay=1e-4,
    epochs=150,
    batch_size=64,  # Larger batch for stability
    scheduler_type='plateau',
    
    # WandB configuration
    use_wandb=True,
    project='gnn-planar-graphs',
    experiment_name='best_practices',
)

In [14]:
results = run_single_experiment(config, dataset )

Dataset created with 1432 graphs
Feature dimensions: 18
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance', 'fiedler_vector', 'eigenvector_energy', 'third_eigenvector', 'dual_degree', 'dual_clustering', 'dual_degree_ratio', 'dual_betweenness', 'face_edge_ratio', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'pagerank']
Using device: cpu
Train size: 1145, Val size: 287


[34m[1mwandb[0m: Currently logged in as: [33maliajrigers[0m ([33maliajrigers-desydeutsches-elektronen-synchrotron[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



Starting training...
Model architecture: gin
Hidden dim: 64, Layers: 4
Initial LR: 0.00011999999999999988
Epoch   0/70: Train Loss=0.7312, Acc=0.5677, Val Loss=0.6306, Acc=0.6237, LR=0.000136
Epoch  10/70: Train Loss=0.5177, Acc=0.7319, Val Loss=0.5181, Acc=0.7422, LR=0.001671
Epoch  20/70: Train Loss=0.4700, Acc=0.7703, Val Loss=0.4506, Acc=0.7735, LR=0.003000
Epoch  30/70: Train Loss=0.4303, Acc=0.7790, Val Loss=0.4143, Acc=0.8049, LR=0.002701
Epoch  40/70: Train Loss=0.3242, Acc=0.8393, Val Loss=0.4066, Acc=0.8223, LR=0.001924
Epoch  50/70: Train Loss=0.2776, Acc=0.8795, Val Loss=0.3956, Acc=0.8571, LR=0.000980
Epoch  60/70: Train Loss=0.1706, Acc=0.9336, Val Loss=0.4565, Acc=0.8502, LR=0.000242


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch  69/70: Train Loss=0.1672, Acc=0.9293, Val Loss=0.4627, Acc=0.8432, LR=0.000000

Best validation accuracy: 0.8711 at epoch 46


0,1
current_lr,▁▁▂▃▃▅▅▆▆▇████████▇▇▇▆▆▆▆▅▅▅▄▄▃▃▂▂▂▂▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
train_accuracy,▁▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████████
train_f1,▁▃▃▃▄▄▅▄▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████████
train_loss,█▇▆▆▅▆▅▅▅▅▅▅▅▄▄▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▁▁▁▁▁▁▁
train_precision,▁▂▂▃▃▃▄▄▄▄▄▄▄▄▅▄▅▄▅▄▅▅▅▅▆▅▅▆▆▆▇▇▇███████
train_recall,▁▃▃▄▅▅▆▆▆▆▆▅▇▆▇▆▇▇▇█▇████▇▇▇▇▇▇▇▇███████
val_accuracy,▁▂▃▃▄▄▄▅▅▄▅▅▅▅▇▆▇▆▆▆▆▅▇▇▆▆▇█▇██▇▇███▇█▇▇
val_f1,▁▃▁▄▅▅▅▅▅▆▆▆▆▅▅▆▇▇▇▇▆▇█▇▆▇▇▆██▆▇███▇▇██▇
val_loss,██▇▇▇▅▆▆▅▅▄▄▅▃▃▃▃▂▃▂▄▃▅▄▂▂▂▁▁▂▂▂▃▂▄▃▄▃▃▄

0,1
current_lr,0.0
epoch,69.0
train_accuracy,0.92926
train_f1,0.93059
train_loss,0.16722
train_precision,0.93621
train_recall,0.92504
val_accuracy,0.84321
val_f1,0.83636
val_loss,0.46271


In [15]:
results = run_single_experiment(onecycle_config, dataset )

Dataset created with 1432 graphs
Feature dimensions: 18
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance', 'fiedler_vector', 'eigenvector_energy', 'third_eigenvector', 'dual_degree', 'dual_clustering', 'dual_degree_ratio', 'dual_betweenness', 'face_edge_ratio', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'pagerank']
Using device: cpu
Train size: 1145, Val size: 287



Starting training...
Model architecture: gin
Hidden dim: 64, Layers: 3
Initial LR: 0.0003599999999999992
Epoch   0/100: Train Loss=0.6804, Acc=0.5913, Val Loss=0.6314, Acc=0.6411, LR=0.000384
Epoch  10/100: Train Loss=0.5071, Acc=0.7590, Val Loss=0.5194, Acc=0.7596, LR=0.002927
Epoch  20/100: Train Loss=0.4977, Acc=0.7712, Val Loss=0.5058, Acc=0.7422, LR=0.007226
Epoch  30/100: Train Loss=0.4819, Acc=0.7651, Val Loss=0.4926, Acc=0.7143, LR=0.008995
Epoch  40/100: Train Loss=0.4939, Acc=0.7633, Val Loss=0.4571, Acc=0.7875, LR=0.008460
Epoch  50/100: Train Loss=0.4031, Acc=0.8201, Val Loss=0.3909, Acc=0.8293, LR=0.007141
Epoch  60/100: Train Loss=0.3605, Acc=0.8428, Val Loss=0.4094, Acc=0.8049, LR=0.005298
Epoch  70/100: Train Loss=0.3056, Acc=0.8638, Val Loss=0.4697, Acc=0.8118, LR=0.003298
Epoch  80/100: Train Loss=0.2321, Acc=0.8952, Val Loss=0.4288, Acc=0.8397, LR=0.001535
Epoch  90/100: Train Loss=0.1787, Acc=0.9188, Val Loss=0.4378, Acc=0.8223, LR=0.000360


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch  99/100: Train Loss=0.1504, Acc=0.9293, Val Loss=0.4678, Acc=0.8293, LR=0.000000

Best validation accuracy: 0.8571 at epoch 61


0,1
current_lr,▂▃▃▄▄▅▅▆█████████▇▇▇▇▆▆▆▅▄▄▄▃▃▂▂▂▂▂▁▁▁▁▁
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇██
train_accuracy,▁▂▃▃▂▂▂▃▃▃▄▃▄▃▂▃▃▄▄▄▄▅▅▅▆▆▆▆▅▆▆▆▇▇▇▇█▇██
train_f1,▁▂▄▅▄▄▄▄▅▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▆▇▇▇▇▇████
train_loss,█▇▆▆▆▆▆▅▆▆▅▅▅▆▆▆▅▅▅▅▄▅▄▅▅▄▄▄▄▄▄▃▃▃▂▂▂▂▁▁
train_precision,▂▂▂▃▃▃▂▁▂▂▂▂▂▃▂▂▂▂▄▄▄▄▃▃▄▅▅▅▅▅▅▆▆▆▇▇▇▇██
train_recall,▁▁▃▃▁▄▆▆▅▇▅▅▆▅▆▅▆▆▇▅▇▇▆▆▆▇▇▆▇▇▇█▇▇▇█▇███
val_accuracy,▁▃▃▆▄▄▃▃▃▆▅▂▅▄▂▇▇█▆▇▆██▇▄▇▆▇▇▇█▆▇▇▇█▇▇▇▇
val_f1,▁▁▄▄▅▅▆▄▅▆▅▆▃▆▄▅▆▇▇█▇▇██▅▇▆▇▆▆▇▇▆▇▇▇▆▇▆▇
val_loss,▆▆▅▄▄▄▆█▆▄▄▄▃▆▆▆▃▃▃▃▃▁▂▃▂▂▁▂▅▂▂▁▂▂▂▂▂▂▃▃

0,1
current_lr,0.0
epoch,99.0
train_accuracy,0.92926
train_f1,0.93256
train_loss,0.15043
train_precision,0.91205
train_recall,0.954
val_accuracy,0.82927
val_f1,0.81919
val_loss,0.46784


# 2 layers 32 channels different features

In [8]:
# Load data
graphs_data = load_graph_data(loop=8)

In [19]:
feature_conf_number = 0;
experiment = list(get_feature_configs().keys())[feature_conf_number]

In [None]:
# Standard Configurations 
feat_conf_full = get_feature_configs()[experiment]
feat_conf_full

{'selected_features': ['basic', 'face'], 'laplacian_pe_k': 0}

In [22]:
dataset_config = SimpleNamespace( selected_features=feat_conf_full['selected_features'],
laplacian_pe_k=feat_conf_full['laplacian_pe_k']) 

# Create dataset
dataset, scaler = create_graph_dataset( graphs_data,
    {
        'selected_features': dataset_config.selected_features,
        'laplacian_pe_k': dataset_config.laplacian_pe_k
    }
)

Extracting features...
Normalizing features...
Created dataset with 1432 graphs
Feature dimensions: 5
Feature names: ['degree', 'num_faces', 'avg_face_size', 'max_face_size', 'face_size_variance']


In [23]:
# Create configuration
config = SimpleNamespace(
    # Model configuration
    model_name='gin',
    hidden_channels=32,
    num_layers=2,
    dropout=0.2,
    
    # Training configuration
    lr=0.001,
    weight_decay=5e-4,
    epochs=70,
    batch_size=32,
    early_stop_patience=20,
    scheduler_type= 'onecycle',
    
    # WandB configuration
    use_wandb=True,
    project='feature study test',
    experiment_name=experiment,
)

In [None]:
run_single_experiment(config, dataset )