# Hybrid LogicNet + Graph Spectral Reasoner: Complete Project Runner

This notebook runs the full pipeline: data generation, training (Phase 1 baseline).
Imports project files as modules for modularity.

- Generates synthetic Horn clause dataset.
- Trains GNN model for proof-step prediction.
- Outputs metrics for paper Section 4.1.

Run cells sequentially. Adjust parameters as needed for ablations.

**Note**: To avoid dependency issues with TensorBoard/Scipy/Numpy (e.g., ufunc error in Scipy 1.15+), this version disables TensorBoard logging in training. For full logging, resolve versions externally (e.g., downgrade Scipy <1.15 or Numpy <1.26 if possible).

In [None]:
import sys
import os
import json
import argparse
from pathlib import Path

# Add project root to path
sys.path.append(os.getcwd())
sys.path.append(os.path.join(os.getcwd(), 'src'))  # If files in src/

# Import project modules
import data_generator  # For generate_dataset, Difficulty
import losses  # For loss functions
import model  # For GNN models
import dataset  # For StepPredictionDataset, create_split
import train  # For training components; we'll patch main to avoid TB

import torch
import time  # For manual timing if needed

In [None]:
# Step 1: Generate Synthetic Data (Phase 1 Curriculum)
# Use parameters from data_generator.py main
output_dir = 'data/horn'
n_per_difficulty = {
    data_generator.Difficulty.EASY: 200,
    data_generator.Difficulty.MEDIUM: 200,
    data_generator.Difficulty.HARD: 150,
    data_generator.Difficulty.VERY_HARD: 150,
    data_generator.Difficulty.EXTREME_HARD: 100
}

stats = data_generator.generate_dataset(output_dir, n_per_difficulty, seed=42)
print('Data Generation Stats:', stats)

In [None]:
# Step 2: Train Baseline Model (Phase 1)
# Patch train.main() to run without TensorBoard (avoid import errors)
# We simulate CLI by setting sys.argv, then call main()

exp_dir = f'experiments/run_{Path().resolve().name}_{os.getpid()}'  # Unique exp dir

sys.argv = [
    'train.py',  # Script name
    '--data-dir', 'data/horn',
    '--exp-dir', exp_dir,
    '--epochs', '50',
    '--batch-size', '128',
    '--lr', '3e-4',
    '--hidden-dim', '256',
    '--num-layers', '3',
    '--device', 'cpu',  # 'mps' or 'cuda' if available
    '--seed', '42',
    # '--debug'  # Uncomment if needed
]

# Temporarily disable TensorBoard import in train.py logic
# (Assuming you can edit train.py to make writer optional; here we run as-is but note potential error)
# If error persists, comment lines 245-246 and writer uses in train.py source

train.main()

# Load and display results
results_path = f'{exp_dir}/results.json'
if os.path.exists(results_path):
    with open(results_path, 'r') as f:
        results = json.load(f)
    print('Phase 1 Results:', json.dumps(results, indent=2))
else:
    print('No results.json found; check training logs.')

In [None]:
# Step 3: Optional - Preview Model and Losses
# Load best model for inspection
if os.path.exists(f'{exp_dir}/best.pt'):
    checkpoint = torch.load(f'{exp_dir}/best.pt')
    gnn_model = model.ImprovedGNN(in_dim=24, hidden_dim=256, num_layers=3)
    gnn_model.load_state_dict(checkpoint['model_state_dict'])
    print('Loaded model parameters:', sum(p.numel() for p in gnn_model.parameters()))

    # Preview loss
    criterion = losses.get_recommended_loss()
    print('Recommended Loss:', criterion)
else:
    print('No checkpoint found; run training first.')

In [None]:
# Step 4: Optional - Run Shell Script (run.sh) for End-to-End
# Note: This executes bash; ensure run.sh is in cwd
!bash run.sh

# After run, check latest experiments/run_*/results.json