In [1]:
### SETUP ###
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import pickle
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

from SCRIPTS.config import *
from SCRIPTS.dataprep import prepare_interval_data, TaskIntervalDataset
from SCRIPTS.topological_model import TopologicalModel
from SCRIPTS.topological_training import train_topological_model
from SCRIPTS.cross_validation_experiments import run_topological_cross_validation

# Visualization setup
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

# Set random seed
np.random.seed(42)
torch.manual_seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Data path: {COMBINED_SCATTERING}")
print(f"Results path: {TOPOLOGICAL_RESULTS_DIR}")

Data path: /Users/judesack/Neurospectrum_Creativity/DATA/SCATTERING_COEFFICIENTS/combined_scattering_data.csv
Results path: /Users/judesack/Neurospectrum_Creativity/RESULTS/cross_validation_results/topological_results


In [2]:
### CONFIGURATION ###

# Train topological models? T/F
TRAIN_TOPOLOGICAL = False

# Run cross-validation? T/F
RUN_CROSS_VALIDATION = True

# Number of CV trials (set to 5)
CV_TRIALS = 5

# Latent dimensions to test
LATENT_DIMS = [8, 48]

In [3]:
### LOAD INTERVAL DATA ###

print("\n=== LOADING INTERVAL DATA ===")
train_loader_subj, test_loader_subj, info_subj = prepare_interval_data(
    scattering_data_path=COMBINED_SCATTERING,
    split_type='subject',
    batch_size=16,
    random_state=42
)

train_loader_time, test_loader_time, info_time = prepare_interval_data(
    scattering_data_path=COMBINED_SCATTERING,
    split_type='time',
    batch_size=16,
    random_state=42
)

print(f"\nSubject split: {info_subj['n_train']} train, {info_subj['n_test']} test")
print(f"Time split: {info_time['n_train']} train, {info_time['n_test']} test")



=== LOADING INTERVAL DATA ===
Found 17 valid subjects

Total intervals extracted: 306 (expected: 306)

Subject split:
  Train subjects (14): ['15040901', '14091701', '15111101', '15052902', '16100101', '14091102', '16100801', '16101401', '16100601', '16102002', '14092201', '15081202sub2', '15053001sub2', '15080601']
  Test subjects (3): ['15053001sub1', '15012001', '14101601']

Split results:
  Train: 252 intervals
  Test: 54 intervals
Found 17 valid subjects

Total intervals extracted: 306 (expected: 306)

Split results:
  Train: 238 intervals
  Test: 68 intervals

Subject split: 252 train, 54 test
Time split: 238 train, 68 test


In [4]:
### TRAIN TOPOLOGICAL MODELS ###

if TRAIN_TOPOLOGICAL:
    print("\n=== TRAINING TOPOLOGICAL MODELS ===")
    
    results = {}
    
    for latent_dim in LATENT_DIMS:
        print(f"\n--- Latent Dimension: {latent_dim} ---")
        results[latent_dim] = {}
        
        print("\nSUBJECT WITHHOLDING:")
        model = TopologicalModel(
            input_dim=768,
            latent_dim=latent_dim,
            num_classes=3
        )
        
        model, history, val_preds, val_labels = train_topological_model(
            model=model,
            train_loader=train_loader_subj,
            test_loader=test_loader_subj,
            num_epochs=DEFAULT_EPOCHS,
            lr=DEFAULT_LEARNING_RATE,
            device=device
        )
        
        val_acc = 100 * np.mean(np.array(val_preds) == np.array(val_labels))
        results[latent_dim]['subject'] = {
            'accuracy': val_acc,
            'predictions': val_preds,
            'labels': val_labels,
            'history': history
        }
        
        print("\nTIME WITHHOLDING:")
        model = TopologicalModel(
            input_dim=768,
            latent_dim=latent_dim,
            num_classes=3
        )
        
        model, history, val_preds, val_labels = train_topological_model(
            model=model,
            train_loader=train_loader_time,
            test_loader=test_loader_time,
            num_epochs=DEFAULT_EPOCHS,
            lr=DEFAULT_LEARNING_RATE,
            device=device
        )
        
        val_acc = 100 * np.mean(np.array(val_preds) == np.array(val_labels))
        results[latent_dim]['time'] = {
            'accuracy': val_acc,
            'predictions': val_preds,
            'labels': val_labels,
            'history': history
        }
    
    save_path = TOPOLOGICAL_RESULTS_DIR / 'topological_training_results.pkl'
    with open(save_path, 'wb') as f:
        pickle.dump(results, f)
    print(f"\n✓ Results saved to {save_path}")
    
    print("\n=== TOPOLOGICAL MODEL RESULTS ===")
    for latent_dim in LATENT_DIMS:
        print(f"\n{latent_dim}D Latent Space:")
        print(f"  Subject: {results[latent_dim]['subject']['accuracy']:.1f}%")
        print(f"  Time: {results[latent_dim]['time']['accuracy']:.1f}%")

else:
    print("✓ Skipping topological model training")

✓ Skipping topological model training


In [5]:
### CROSS-VALIDATION ###

if RUN_CROSS_VALIDATION:
    print(f"\n=== CROSS-VALIDATION EXPERIMENTS ({CV_TRIALS} trials) ===")
    
    cv_results = {}
    
    for latent_dim in LATENT_DIMS:
        print(f"\n--- LATENT DIMENSION: {latent_dim} ---")
        cv_results[latent_dim] = {}
        
        for split_type in ['subject', 'time']:
            print(f"\n{split_type.upper()} SPLIT:")
            cv_results[latent_dim][split_type] = run_topological_cross_validation(
                data_path=COMBINED_SCATTERING,
                split_type=split_type,
                latent_dim=latent_dim,
                num_trials=CV_TRIALS,
                num_epochs=DEFAULT_EPOCHS
            )
    
    save_path = TOPOLOGICAL_RESULTS_DIR / 'topological_cv_results.pkl'
    with open(save_path, 'wb') as f:
        pickle.dump(cv_results, f)
    print(f"\n✓ Results saved to {save_path}")
    
    print("\n=== CROSS-VALIDATION SUMMARY ===")
    for latent_dim in LATENT_DIMS:
        print(f"\n{latent_dim}D Latent Space:")
        print(f"  Subject: {cv_results[latent_dim]['subject']['mean_accuracy']:.1f}% ± {cv_results[latent_dim]['subject']['std_accuracy']:.1f}%")
        print(f"  Time: {cv_results[latent_dim]['time']['mean_accuracy']:.1f}% ± {cv_results[latent_dim]['time']['std_accuracy']:.1f}%")

else:
    # Load existing results
    cv_file = TOPOLOGICAL_RESULTS_DIR / 'topological_cv_results.pkl'
    
    if cv_file.exists():
        with open(cv_file, 'rb') as f:
            cv_results = pickle.load(f)
        
        print("✓ Loaded existing cross-validation results")
        
        print("\n=== CROSS-VALIDATION SUMMARY ===")
        for latent_dim in LATENT_DIMS:
            if latent_dim in cv_results:
                print(f"\n{latent_dim}D Latent Space:")
                print(f"  Subject: {cv_results[latent_dim]['subject']['mean_accuracy']:.1f}% ± {cv_results[latent_dim]['subject']['std_accuracy']:.1f}%")
                print(f"  Time: {cv_results[latent_dim]['time']['mean_accuracy']:.1f}% ± {cv_results[latent_dim]['time']['std_accuracy']:.1f}%")
    else:
        print("✓ No existing cross-validation results found")


=== CROSS-VALIDATION EXPERIMENTS (5 trials) ===

--- LATENT DIMENSION: 8 ---

SUBJECT SPLIT:

=== Topological Model Trial 1/5 (subject split, 8D) ===
Found 17 valid subjects

Total intervals extracted: 306 (expected: 306)

Subject split:
  Train subjects (14): ['15040901', '14091701', '15111101', '15052902', '16100101', '14091102', '16100801', '16101401', '16100601', '16102002', '14092201', '15081202sub2', '15053001sub2', '15080601']
  Test subjects (3): ['15053001sub1', '15012001', '14101601']

Split results:
  Train: 252 intervals
  Test: 54 intervals
Epoch 10/100:
  Train Acc: 40.9%, Val Acc: 55.6%
  Recon Loss: 0.2253, Class Loss: 1.4591
  Predicting classes: [0]
Epoch 20/100:
  Train Acc: 55.6%, Val Acc: 55.6%
  Recon Loss: 0.1343, Class Loss: 1.0188
  Predicting classes: [0]
Epoch 30/100:
  Train Acc: 55.2%, Val Acc: 55.6%
  Recon Loss: 0.1113, Class Loss: 0.9937
  Predicting classes: [0]
Epoch 40/100:
  Train Acc: 55.6%, Val Acc: 55.6%
  Recon Loss: 0.1282, Class Loss: 1.0148
 