# Emergenics: Phase 1 Notebook: Universality & Scaling

# 

Copyright 2025 Michael Gerald Young II, Emergenics Foundation

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [1]:
# Cell 0: Initial Setup & Imports (Emergenics Phase 1 - GPU)
# Description: Basic imports, setup, device check (prioritizing GPU).

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import torch # Import PyTorch
import requests
import io
import gzip
import shutil
import copy
import math
import json
import time
import pickle
import warnings
import itertools
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm.auto import tqdm
from scipy.stats import entropy as calculate_scipy_entropy
from scipy.optimize import curve_fit, minimize # Keep scipy optimize for fitting
import multiprocessing as mp # Explicitly import for setting start method
import ast # For literal_eval parsing if needed

# --- Try importing optional Phase 2 dependencies ---
try:
    import umap
    print("✅ UMAP imported successfully.")
    UMAP_AVAILABLE = True
except ImportError:
    print("⚠️ UMAP not found. Install with 'pip install umap-learn'. UMAP analysis will be skipped.")
    UMAP_AVAILABLE = False
# Add other optional imports here (e.g., for specific information metrics)


# Import display tools if needed (less relevant for non-interactive phase 1 runs)
# from IPython.display import display, Image

# Ignore common warnings for cleaner output (optional)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
warnings.filterwarnings("ignore", category=RuntimeWarning)

print(f"--- Cell 0: Initial Setup (Emergenics - Phase 2 Ready) ({time.strftime('%Y-%m-%d %H:%M:%S')}) ---")

# --- Device Check ---
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"✅ CUDA available with {num_gpus} GPU(s).")
    # Simple strategy: use GPU 0 if available, otherwise CPU
    # More complex strategies could involve load balancing or specific GPU selection
    if num_gpus > 0:
        device = torch.device('cuda:0') # Use the first available CUDA device
        try:
            dev_name = torch.cuda.get_device_name(0)
            print(f"✅ Using GPU 0: {dev_name}")
        except Exception as e:
            print(f"✅ Using GPU 0, but couldn't get device name: {e}")
    else:
        # This case should ideally not happen if torch.cuda.is_available() is true
        print("⚠️ CUDA reported available, but no devices found. Using CPU.")
        device = torch.device('cpu')
else:
    device = torch.device('cpu')
    print("⚠️ CUDA not available, using CPU.")

# Make device globally accessible
global_device = device
print(f"PyTorch Device set to: {global_device}")

# --- Base Directories (Ensure they exist) ---
DATA_ROOT_DIR = "/tmp/cakg_data" # Or a more persistent location
# Define separate base directories for Phase 1 and Phase 2 results
OUTPUT_DIR_BASE_PHASE1 = "emergenics_phase1_results"
OUTPUT_DIR_BASE_PHASE2 = "emergenics_phase2_results"
os.makedirs(DATA_ROOT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR_BASE_PHASE1, exist_ok=True)
os.makedirs(OUTPUT_DIR_BASE_PHASE2, exist_ok=True)
print(f"Checked/created base directories (Phase 1: {OUTPUT_DIR_BASE_PHASE1}, Phase 2: {OUTPUT_DIR_BASE_PHASE2}).")

# --- Multiprocessing Start Method ---
# Set start method to 'spawn' early, essential for CUDA with multiprocessing
try:
    current_start_method = mp.get_start_method(allow_none=True)
    if current_start_method != 'spawn':
         mp.set_start_method('spawn', force=True)
         print("✅ Set multiprocessing start method to 'spawn'.")
    else:
         print(f"✅ Multiprocessing start method already '{current_start_method}'.")
except Exception as e_spawn:
    print(f"⚠️ Warning: Could not set multiprocessing start method to 'spawn'. Error: {e_spawn}")
    print("   CUDA GPU usage in parallel workers might fail.")


print("Cell 0 execution complete.")

✅ UMAP imported successfully.
--- Cell 0: Initial Setup (Emergenics - Phase 2 Ready) (2025-04-15 15:48:33) ---
✅ CUDA available with 1 GPU(s).
✅ Using GPU 0: NVIDIA GeForce RTX 2060
PyTorch Device set to: cuda:0
Checked/created base directories (Phase 1: emergenics_phase1_results, Phase 2: emergenics_phase2_results).
✅ Set multiprocessing start method to 'spawn'.
Cell 0 execution complete.


In [2]:
# Cell 1: Configuration (Phase 2 - Updates - Corrected Metrics Loading)
# Description: Adds Phase 2 configuration parameters. Correctly loads Phase 1
#              key metrics (pc values) to define targeted sweep ranges.

import numpy as np
import os
import json
import time
import traceback
import copy
import warnings # Import warnings

print(f"\n--- Cell 1: Configuration (Phase 2 - Updates - Corrected Metrics Loading) ---")

# --- Experiment Setup ---
# Load existing Phase 1 config if possible, otherwise define defaults
config_save_path_phase1 = None
phase1_output_dir = None # Store Phase 1 output directory
output_dir_base_phase1 = "emergenics_phase1_results"
exp_pattern_phase1 = "Emergenics_Phase1_5D_HDC_RSV_N357" # Match the pattern used in Phase 1
phase1_config = {} # Initialize empty config dict

try:
    # Check if a config dict already exists globally (e.g., from previous run)
    config_loaded_from_global = False
    if 'config' in globals() and isinstance(globals()['config'], dict) and 'OUTPUT_DIR' in globals()['config']:
        # Use already loaded config if it seems valid (has OUTPUT_DIR)
        loaded_config_check = globals()['config']
        # Heuristic check: if it contains Phase 2 keys, it might be the wrong one.
        # If it lacks Phase 2 keys, assume it's the intended Phase 1 config.
        if 'GRAPH_MODEL_PARAMS_PHASE2' not in loaded_config_check:
             phase1_config = loaded_config_check
             phase1_output_dir = phase1_config.get('OUTPUT_DIR')
             if phase1_output_dir:
                  config_save_path_phase1 = os.path.join(phase1_output_dir, "run_config_phase1.json")
                  print(f"  Using Phase 1 config loaded previously from: {phase1_output_dir}")
                  config_loaded_from_global = True
             else:
                  print("  Previously loaded config lacks OUTPUT_DIR. Searching for latest Phase 1 run.")
        else:
            print("  Global 'config' seems to be from Phase 2 already. Searching for latest Phase 1 run.")


    # If not loaded from global, search filesystem
    if not config_loaded_from_global:
        if os.path.isdir(output_dir_base_phase1):
            all_subdirs = [d for d in os.listdir(output_dir_base_phase1) if os.path.isdir(os.path.join(output_dir_base_phase1, d)) and d.startswith(exp_pattern_phase1)]
            if all_subdirs:
                latest_run_dir = max([os.path.join(output_dir_base_phase1, d) for d in all_subdirs], key=os.path.getmtime)
                config_save_path_phase1 = os.path.join(latest_run_dir, "run_config_phase1.json")
                if os.path.exists(config_save_path_phase1):
                    with open(config_save_path_phase1, 'r') as f:
                        phase1_config = json.load(f)
                    phase1_output_dir = phase1_config.get('OUTPUT_DIR', latest_run_dir) # Get output dir from config or use found dir
                    print(f"  Loaded Phase 1 config from latest run: {latest_run_dir}")
                else:
                    print(f"  Config file not found in latest Phase 1 dir: {latest_run_dir}. Using defaults.")
                    phase1_config = {} # Fallback
            else:
                 print(f"  No Phase 1 experiment directories found matching pattern '{exp_pattern_phase1}'. Using defaults.")
                 phase1_config = {} # Fallback
        else:
            print(f"  Phase 1 base directory '{output_dir_base_phase1}' not found. Using defaults.")
            phase1_config = {} # Fallback

except Exception as e_load_cfg:
    print(f"  Error loading Phase 1 config: {e_load_cfg}. Using defaults.")
    traceback.print_exc(limit=1)
    phase1_config = {} # Fallback

# --- Load Phase 1 Key Metrics (pc values) ---
phase1_key_metrics = {} # Initialize empty metrics dict
phase1_key_metrics_path = None
if phase1_output_dir: # Check if we identified the Phase 1 output directory
    phase1_exp_name = phase1_config.get('EXPERIMENT_NAME', os.path.basename(phase1_output_dir)) # Get exp name from config or dir name
    phase1_key_metrics_path = os.path.join(phase1_output_dir, f"{phase1_exp_name}_key_metrics.json")
    if os.path.exists(phase1_key_metrics_path):
        try:
            with open(phase1_key_metrics_path, 'r') as f_metrics:
                phase1_key_metrics = json.load(f_metrics)
            print(f"  Successfully loaded Phase 1 key metrics from: {phase1_key_metrics_path}")
        except Exception as e_load_metrics:
            print(f"  Warning: Failed to load or parse Phase 1 key metrics file '{phase1_key_metrics_path}': {e_load_metrics}")
            phase1_key_metrics = {} # Reset on failure
    else:
        print(f"  Warning: Phase 1 key metrics file not found at: {phase1_key_metrics_path}")
        phase1_key_metrics = {} # Set empty if file not found
else:
    print("  Warning: Could not determine Phase 1 output directory. Unable to load key metrics.")
    phase1_key_metrics = {} # Set empty if dir unknown


# --- Experiment Naming for Phase 2 ---
EXPERIMENT_BASE_NAME_PH2 = phase1_config.get("EXPERIMENT_BASE_NAME", "Emergenics") + "_Phase2" # Append Phase2 marker
EXPERIMENT_NAME = f"{EXPERIMENT_BASE_NAME_PH2}_{time.strftime('%Y%m%d_%H%M%S')}"
print(f"🧪 Phase 2 Experiment Name: {EXPERIMENT_NAME}")

# --- Inherit or Define Core Model & Simulation Parameters ---
# Use .get() with defaults for robustness
STATE_DIM = phase1_config.get('STATE_DIM', 5)
MAX_SIMULATION_STEPS = phase1_config.get('MAX_SIMULATION_STEPS', 200)
CONVERGENCE_THRESHOLD = phase1_config.get('CONVERGENCE_THRESHOLD', 1e-4)
RULE_PARAMS = phase1_config.get('RULE_PARAMS', {
    'activation_threshold': 0.5, 'activation_increase_rate': 0.15, 'activation_decay_rate': 0.05,
    'inhibition_threshold': 0.5, 'inhibition_increase_rate': 0.1, 'inhibition_decay_rate': 0.1,
    'inhibition_feedback_threshold': 0.6, 'inhibition_feedback_strength': 0.3,
    'diffusion_factor': 0.05, 'noise_level': 0.001, 'harmonic_factor': 0.05,
    'w_decay_rate': 0.05, 'x_decay_rate': 0.05, 'y_decay_rate': 0.05
}) # Simplified default if needed
SYSTEM_SIZES = phase1_config.get('SYSTEM_SIZES', [300, 500, 700])
PRIMARY_ORDER_PARAMETER = phase1_config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')
GRAPH_MODEL_PARAMS = phase1_config.get('GRAPH_MODEL_PARAMS', {
    'WS': { 'k_neighbors': 4, 'p_values': np.logspace(-5, 0, 20) },
    'SBM': { 'n_communities': 2, 'p_inter': 0.01, 'p_intra_values': np.linspace(0.01, 0.5, 20) },
    'RGG': { 'radius_values': np.linspace(0.05, 0.5, 20) }
})
# Ensure parameter values are lists/arrays, not strings from JSON load
for model, params in GRAPH_MODEL_PARAMS.items():
     for key, value in params.items():
          if key.endswith('_values') and isinstance(value, list):
               params[key] = np.array(value)


NUM_INSTANCES_PER_PARAM = phase1_config.get('NUM_INSTANCES_PER_PARAM', 10) # Can reduce for Phase 2 focused runs if needed
NUM_TRIALS_PER_INSTANCE = phase1_config.get('NUM_TRIALS_PER_INSTANCE', 3) # Can reduce
PARALLEL_WORKERS = phase1_config.get('PARALLEL_WORKERS', os.cpu_count()) # Use CPU count as fallback

print(f"🧬 Core Params: State Dim={STATE_DIM}, Max Steps={MAX_SIMULATION_STEPS}")
print(f"📐 Baseline Rule Params Loaded/Defined.")
print(f"🔢 System Sizes (N) for Analysis: {SYSTEM_SIZES}")
print(f"🕸️ Graph Model Params Defined: {list(GRAPH_MODEL_PARAMS.keys())}")

# --- Phase 2 Specific Parameters ---

# 2.1 Information Processing
STORE_STATE_HISTORY = True # Set to True to enable history-dependent metrics
STATE_HISTORY_INTERVAL = 10 # Store state every N steps (reduces memory vs storing every step)
INFO_METRICS_TO_CALC = ['mean_final_state_entropy'] # Add metrics like 'mean_step_entropy' later if needed
print(f"📊 Info Processing: Store History={STORE_STATE_HISTORY} (Interval: {STATE_HISTORY_INTERVAL}), Metrics={INFO_METRICS_TO_CALC}")

# 2.2 Attractor Landscape
RUN_PCA_ANALYSIS = True # Flag to control running the potentially slow PCA cell
RUN_UMAP_ANALYSIS = True # Flag to control running UMAP analysis
UMAP_PARAMS = {'n_neighbors': 15, 'min_dist': 0.1, 'n_components': 2, 'metric': 'euclidean'}
NUM_SAMPLES_FOR_LANDSCAPE = 500 # Number of final state vectors to collect per parameter setting for UMAP/PCA
print(f"🗺️ Attractor Landscape: Run PCA={RUN_PCA_ANALYSIS}, Run UMAP={RUN_UMAP_ANALYSIS} (Samples per param: {NUM_SAMPLES_FOR_LANDSCAPE})")
print(f"   UMAP Params: {UMAP_PARAMS}")

# 2.3 Perturbation Response
RUN_PERTURBATION_ANALYSIS = True # Flag to control perturbation sweeps
PERTURBATION_CONFIG = {
    'apply_at_step': 50,        # When to apply the perturbation
    'duration_steps': 5,        # How long the perturbation lasts
    'target_node_fraction': 0.01, # Fraction of nodes to perturb
    'target_dimension': 0,      # Which state dimension (e.g., 'u')
    'perturbation_value': 1.0   # Value to clamp the state dimension to
}
PERTURBATION_METRICS_TO_CALC = ['relaxation_time', 'perturbation_spread'] # Metrics calculated by modified worker
print(f"⚡ Perturbation: Run Analysis={RUN_PERTURBATION_ANALYSIS}")
print(f"   Perturbation Config: {PERTURBATION_CONFIG}")
print(f"   Perturbation Metrics: {PERTURBATION_METRICS_TO_CALC}")

# --- Phase 2 Targeted Sweep Parameters ---
# Define parameter ranges focused around the critical points found in Phase 1
# Use .get() on the loaded phase1_key_metrics dictionary with defaults
# ** CORRECTED AREA **
pc_ws = phase1_key_metrics.get('final_pc_ws_chi', 0.001) # Use loaded metric or default guess
pc_sbm = phase1_key_metrics.get('final_pc_sbm_chi', 0.1)
pc_rgg = phase1_key_metrics.get('final_pc_rgg_chi', 0.28)
print(f"  Using Phase 1 critical points for targeting: pc(WS)≈{pc_ws:.4g}, pc(SBM)≈{pc_sbm:.4g}, rc(RGG)≈{pc_rgg:.4g}")

# Example: Define focused ranges (adjust density/width as needed)
FOCUS_FACTOR = 5 # How many times wider than pc to scan (linear scale relative to pc)
NUM_POINTS_FOCUS = 15 # Number of points in the focused scan

# Define GRAPH_MODEL_PARAMS_PHASE2 dictionary
GRAPH_MODEL_PARAMS_PHASE2 = {}

# WS Targeted Range
ws_base_params = GRAPH_MODEL_PARAMS.get('WS', {})
p_start_ws = max(1e-6, pc_ws / (FOCUS_FACTOR * 2)) # Ensure positive start, maybe geometric mean?
p_end_ws = pc_ws * (FOCUS_FACTOR * 2) # Extend further above
# Use logspace for WS 'p' parameter as it spans orders of magnitude
ws_p_values_focus = np.logspace(np.log10(p_start_ws), np.log10(p_end_ws), NUM_POINTS_FOCUS)
GRAPH_MODEL_PARAMS_PHASE2['WS'] = {
    'k_neighbors': ws_base_params.get('k_neighbors', 4),
    'p_values': np.unique(ws_p_values_focus) # Ensure unique values
}

# SBM Targeted Range
sbm_base_params = GRAPH_MODEL_PARAMS.get('SBM', {})
p_intra_start_sbm = max(sbm_base_params.get('p_inter', 0.01) * 1.01, pc_sbm / FOCUS_FACTOR) # Ensure > p_inter and positive
p_intra_end_sbm = pc_sbm * FOCUS_FACTOR
sbm_p_intra_values_focus = np.linspace(p_intra_start_sbm, p_intra_end_sbm, NUM_POINTS_FOCUS)
GRAPH_MODEL_PARAMS_PHASE2['SBM'] = {
    'n_communities': sbm_base_params.get('n_communities', 2),
    'p_inter': sbm_base_params.get('p_inter', 0.01),
    'p_intra_values': np.unique(sbm_p_intra_values_focus) # Ensure unique
}

# RGG Targeted Range
rgg_base_params = GRAPH_MODEL_PARAMS.get('RGG', {})
radius_start_rgg = max(0.01, pc_rgg / FOCUS_FACTOR) # Ensure positive start
radius_end_rgg = pc_rgg * FOCUS_FACTOR
rgg_radius_values_focus = np.linspace(radius_start_rgg, radius_end_rgg, NUM_POINTS_FOCUS)
GRAPH_MODEL_PARAMS_PHASE2['RGG'] = {
    'radius_values': np.unique(rgg_radius_values_focus) # Ensure unique
}

# Ensure no negative values resulted from calculations near zero (redundant but safe)
if 'p_values' in GRAPH_MODEL_PARAMS_PHASE2.get('WS',{}):
     GRAPH_MODEL_PARAMS_PHASE2['WS']['p_values'] = GRAPH_MODEL_PARAMS_PHASE2['WS']['p_values'][GRAPH_MODEL_PARAMS_PHASE2['WS']['p_values'] > 0]
if 'p_intra_values' in GRAPH_MODEL_PARAMS_PHASE2.get('SBM',{}):
     p_inter_sbm = GRAPH_MODEL_PARAMS_PHASE2['SBM'].get('p_inter', 0.01)
     GRAPH_MODEL_PARAMS_PHASE2['SBM']['p_intra_values'] = np.maximum(GRAPH_MODEL_PARAMS_PHASE2['SBM']['p_intra_values'], p_inter_sbm * 1.01)


print(f"🎯 Phase 2 Targeted Graph Params:")
if 'WS' in GRAPH_MODEL_PARAMS_PHASE2: print(f"   WS p_values range: {GRAPH_MODEL_PARAMS_PHASE2['WS'].get('p_values', np.array([])).min():.1e} to {GRAPH_MODEL_PARAMS_PHASE2['WS'].get('p_values', np.array([])).max():.1e} ({len(GRAPH_MODEL_PARAMS_PHASE2['WS'].get('p_values', []))} points around {pc_ws:.1e})")
if 'SBM' in GRAPH_MODEL_PARAMS_PHASE2: print(f"   SBM p_intra_values range: {GRAPH_MODEL_PARAMS_PHASE2['SBM'].get('p_intra_values', np.array([])).min():.3f} to {GRAPH_MODEL_PARAMS_PHASE2['SBM'].get('p_intra_values', np.array([])).max():.3f} ({len(GRAPH_MODEL_PARAMS_PHASE2['SBM'].get('p_intra_values', []))} points around {pc_sbm:.3f})")
if 'RGG' in GRAPH_MODEL_PARAMS_PHASE2: print(f"   RGG radius_values range: {GRAPH_MODEL_PARAMS_PHASE2['RGG'].get('radius_values', np.array([])).min():.3f} to {GRAPH_MODEL_PARAMS_PHASE2['RGG'].get('radius_values', np.array([])).max():.3f} ({len(GRAPH_MODEL_PARAMS_PHASE2['RGG'].get('radius_values', []))} points around {pc_rgg:.3f})")


# --- Output Directory ---
OUTPUT_DIR_BASE = "emergenics_phase2_results" # New base directory for Phase 2
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, EXPERIMENT_NAME)
try:
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    print(f"➡️ Phase 2 Results will be saved in: {OUTPUT_DIR}")
except OSError as e_mkdir:
    print(f"❌ Error creating Phase 2 output directory '{OUTPUT_DIR}': {e_mkdir}")
    OUTPUT_DIR = "." # Fallback to current directory


# --- Save Configuration ---
config_save_path = os.path.join(OUTPUT_DIR, "run_config_phase2.json")
try:
    # Select uppercase variables and specific lowercase ones relevant to Phase 2
    config_to_save = {k: v for k, v in locals().items() if k.isupper()}
    # Add phase 1 inherited variables explicitly if needed for reproducibility
    config_to_save['STATE_DIM'] = STATE_DIM
    config_to_save['MAX_SIMULATION_STEPS'] = MAX_SIMULATION_STEPS
    config_to_save['CONVERGENCE_THRESHOLD'] = CONVERGENCE_THRESHOLD
    config_to_save['RULE_PARAMS'] = RULE_PARAMS
    config_to_save['SYSTEM_SIZES'] = SYSTEM_SIZES
    config_to_save['PRIMARY_ORDER_PARAMETER'] = PRIMARY_ORDER_PARAMETER
    config_to_save['GRAPH_MODEL_PARAMS'] = GRAPH_MODEL_PARAMS # Keep original Phase 1 ranges
    config_to_save['NUM_INSTANCES_PER_PARAM'] = NUM_INSTANCES_PER_PARAM
    config_to_save['NUM_TRIALS_PER_INSTANCE'] = NUM_TRIALS_PER_INSTANCE
    config_to_save['PARALLEL_WORKERS'] = PARALLEL_WORKERS
    # Add Phase 1 metrics file path for reference
    config_to_save['phase1_key_metrics_path'] = phase1_key_metrics_path # Store path used
    # Add Phase 2 specific configs
    config_to_save['STORE_STATE_HISTORY'] = STORE_STATE_HISTORY
    config_to_save['STATE_HISTORY_INTERVAL'] = STATE_HISTORY_INTERVAL
    config_to_save['INFO_METRICS_TO_CALC'] = INFO_METRICS_TO_CALC
    config_to_save['RUN_PCA_ANALYSIS'] = RUN_PCA_ANALYSIS
    config_to_save['RUN_UMAP_ANALYSIS'] = RUN_UMAP_ANALYSIS
    config_to_save['UMAP_PARAMS'] = UMAP_PARAMS
    config_to_save['NUM_SAMPLES_FOR_LANDSCAPE'] = NUM_SAMPLES_FOR_LANDSCAPE
    config_to_save['RUN_PERTURBATION_ANALYSIS'] = RUN_PERTURBATION_ANALYSIS
    config_to_save['PERTURBATION_CONFIG'] = PERTURBATION_CONFIG
    config_to_save['PERTURBATION_METRICS_TO_CALC'] = PERTURBATION_METRICS_TO_CALC
    config_to_save['GRAPH_MODEL_PARAMS_PHASE2'] = GRAPH_MODEL_PARAMS_PHASE2 # Save targeted ranges
    config_to_save['OUTPUT_DIR'] = OUTPUT_DIR # Save the *Phase 2* output dir

    # Make serializable
    def default_serializer(obj):
        if isinstance(obj, np.ndarray): return obj.tolist()
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)): return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): return float(obj)
        elif isinstance(obj, (np.complex_, np.complex64, np.complex128)): return {'real': obj.real, 'imag': obj.imag}
        elif isinstance(obj, (np.bool_)): return bool(obj)
        elif isinstance(obj, (np.void)): return None # Or some other representation
        try: return str(obj) # Fallback
        except: return '<not serializable>'

    with open(config_save_path, 'w') as f:
        json.dump(config_to_save, f, indent=4, default=default_serializer)
    print(f"   ✅ Saved Phase 2 configuration to {config_save_path}")
except Exception as e_save_cfg:
    print(f"   ⚠️ Warning: Could not save Phase 2 configuration. Error: {e_save_cfg}")
    traceback.print_exc(limit=1)

# Make config dictionary globally accessible for Phase 2
# Check if 'config_to_save' exists before assignment
if 'config_to_save' in locals():
    config = config_to_save
    print("   Phase 2 configuration assigned to global 'config'.")
else:
    print("   ⚠️ Could not assign config_to_save to global 'config'.")
    # Fallback: assign the potentially incomplete phase1_config
    config = phase1_config
    warnings.warn("Global 'config' assigned potentially incomplete Phase 1 config due to saving errors.", RuntimeWarning)


print("\nCell 1 execution complete.")


--- Cell 1: Configuration (Phase 2 - Updates - Corrected Metrics Loading) ---
  Loaded Phase 1 config from latest run: emergenics_phase1_results/Emergenics_Phase1_5D_HDC_RSV_N357_20250415_133241
  Successfully loaded Phase 1 key metrics from: emergenics_phase1_results/Emergenics_Phase1_5D_HDC_RSV_N357_20250415_133241/Emergenics_Phase1_5D_HDC_RSV_N357_20250415_133241_key_metrics.json
🧪 Phase 2 Experiment Name: Emergenics_Phase1_5D_HDC_RSV_N357_Phase2_20250415_154833
🧬 Core Params: State Dim=5, Max Steps=200
📐 Baseline Rule Params Loaded/Defined.
🔢 System Sizes (N) for Analysis: [300, 500, 700]
🕸️ Graph Model Params Defined: ['WS', 'SBM', 'RGG']
📊 Info Processing: Store History=True (Interval: 10), Metrics=['mean_final_state_entropy']
🗺️ Attractor Landscape: Run PCA=True, Run UMAP=True (Samples per param: 500)
   UMAP Params: {'n_neighbors': 15, 'min_dist': 0.1, 'n_components': 2, 'metric': 'euclidean'}
⚡ Perturbation: Run Analysis=True
   Perturbation Config: {'apply_at_step': 50, 'dur

In [3]:
# Cell 2: Helper Function Definitions (Phase 2 Implementation - Updated Worker - JIT Fix)
# Description: Defines helper functions. Includes get_sweep_parameters, generate_graph,
#              metric calculations, the JIT-compiled GPU step function (with JIT dim fix).
#              Defines `run_single_instance_phase2` incorporating state history,
#              perturbation logic, and Phase 2 metrics. Adds UMAP/info metric helpers.

import numpy as np
import pandas as pd
import networkx as nx
import itertools
import warnings
import time
from scipy.stats import entropy as calculate_scipy_entropy
from scipy.sparse import coo_matrix
import traceback
import torch
import copy
import os # For UMAP saving check
import pickle # For UMAP saving check

# --- Try importing optional Phase 2 dependencies ---
try:
    import umap
    print("✅ UMAP imported successfully.")
    UMAP_AVAILABLE = True
except ImportError:
    print("⚠️ UMAP not found. Install with 'pip install umap-learn'. UMAP analysis will be skipped.")
    UMAP_AVAILABLE = False
# Add other optional imports here (e.g., for specific information metrics)

print("\n--- Cell 2: Helper Function Definitions (Phase 2 Implementation - Updated Worker - JIT Fix) ---")

# --- 1. Parameter Generation (Copied from Phase 1 Cell 2) ---
def get_sweep_parameters(graph_model_name, model_params, system_sizes, instances, trials, sensitivity_param=None, sensitivity_values=None):
    """Generates parameter dictionaries for simulation tasks, ensuring primary sweep param is always included."""
    all_task_params = []
    base_seed = int(time.time()) % 10000
    param_counter = 0
    primary_param_key = None
    primary_param_name = None
    primary_param_values = None
    fixed_params = {}

    # Identify primary sweep parameter (e.g., p_values) and fixed params
    if not isinstance(model_params, dict): model_params = {} # Handle None case
    key_iterator = iter(model_params.keys())
    current_key = next(key_iterator, None)
    while current_key is not None:
        values = model_params[current_key]
        # Check if it's a list-like structure intended for sweeping
        is_sweep_param = isinstance(values, (list, np.ndarray))
        if is_sweep_param:
            primary_param_key = current_key
            primary_param_name = current_key.replace('_values', '')
            primary_param_values = values
            # Stop after finding the first sweep parameter if desired, or continue to find last? Assume first.
            # break
        else:
            fixed_params[current_key] = values
        current_key = next(key_iterator, None)


    # Handle cases where primary sweep param might not be explicitly a list/array
    if primary_param_key is None:
        if graph_model_name == 'RGG' and 'radius_values' in model_params:
            primary_param_key = 'radius_values'
            primary_param_name = 'radius'
            primary_param_values = model_params['radius_values']
        elif graph_model_name == 'SBM' and 'p_intra_values' in model_params:
            primary_param_key = 'p_intra_values'
            primary_param_name = 'p_intra'
            primary_param_values = model_params['p_intra_values']
        elif graph_model_name == 'WS' and 'p_values' in model_params:
            primary_param_key = 'p_values'
            primary_param_name = 'p'
            primary_param_values = model_params['p_values']
        else:
            # Fallback if no sweep parameter identified
            primary_param_name = 'param'
            primary_param_values = [0] # Dummy sweep value
            warnings.warn(f"Sweep param not found for {graph_model_name}. Using dummy 'param'.")

    # Ensure primary_param_values is iterable
    if not hasattr(primary_param_values, '__iter__'):
        primary_param_values = [primary_param_values] # Make it a list if it's a single value

    # Determine the actual column name for the primary sweep parameter
    primary_param_col_name = primary_param_name + '_value'

    # Determine sensitivity loop values ([None] if not a sensitivity sweep)
    use_sensitivity = isinstance(sensitivity_param, str) and sensitivity_param != "" and isinstance(sensitivity_values, (list, np.ndarray)) and len(sensitivity_values) > 0
    if use_sensitivity:
        sens_loop_values = sensitivity_values
    else:
        sens_loop_values = [None]

    # Main parameter generation loops using while loops
    n_idx = 0
    while n_idx < len(system_sizes):
        N = system_sizes[n_idx]
        p_idx = 0
        while p_idx < len(primary_param_values):
            p_val = primary_param_values[p_idx]
            sens_idx = 0
            while sens_idx < len(sens_loop_values):
                sens_val = sens_loop_values[sens_idx]
                inst_idx = 0
                while inst_idx < instances:
                    graph_seed = base_seed + param_counter + inst_idx * 13
                    trial_idx = 0
                    while trial_idx < trials:
                        sim_seed = base_seed + param_counter + inst_idx * 101 + trial_idx * 7
                        task = {
                            'model': graph_model_name, 'N': N,
                            'fixed_params': fixed_params.copy(),
                            # Explicitly include primary sweep param name/value
                            primary_param_col_name: p_val,
                            'instance': inst_idx, 'trial': trial_idx,
                            'graph_seed': graph_seed, 'sim_seed': sim_seed,
                            'rule_param_name': sensitivity_param if use_sensitivity else None, # Only add if doing sensitivity
                            'rule_param_value': sens_val if use_sensitivity else None
                        }
                        all_task_params.append(task)
                        param_counter += 1
                        trial_idx += 1
                    inst_idx += 1
                sens_idx += 1
            p_idx += 1
        n_idx += 1

    return all_task_params


# --- 2. Graph Generation (Copied from Phase 1 Cell 2) ---
def generate_graph(model_name, params, N, seed):
    """Generates a graph using NetworkX."""
    np.random.seed(seed)
    G = nx.Graph()
    try:
        # Prepare parameters for NetworkX functions
        gen_params = params.copy()
        # Find the parameter key ending with '_value' (e.g., 'p_value')
        base_param_name = None
        param_key_iterator = iter(gen_params.keys())
        current_key = next(param_key_iterator, None)
        while current_key is not None:
            if isinstance(current_key, str) and current_key.endswith('_value'):
                base_param_name = current_key.replace('_value', '')
                break
            current_key = next(param_key_iterator, None)

        # Rename key if generate_graph expects base name (e.g., 'p' instead of 'p_value')
        if base_param_name is not None:
            value_key = base_param_name + '_value'
            if value_key in gen_params:
                 gen_params[base_param_name] = gen_params.pop(value_key)


        # Generate graph based on model name
        if model_name == 'WS':
            k = gen_params.get('k_neighbors', 4)
            p_rewire = gen_params.get('p', 0.1)  # Expects 'p' key now
            k = int(k)
            # Ensure k is even and >= 2
            if k < 2: k = 2
            if k % 2 != 0: k = k - 1
            k = min(k, N - 1) # Ensure k < N
            if N > k:
                G = nx.watts_strogatz_graph(n=N, k=k, p=p_rewire, seed=seed)
            else:
                # Fallback for small N relative to k (or if k calculation fails)
                warnings.warn(f"WS N={N} <= k={k}. Generating complete graph instead.", RuntimeWarning)
                G = nx.complete_graph(N)
        elif model_name == 'SBM':
            n_communities = gen_params.get('n_communities', 2)
            p_intra = gen_params.get('p_intra', 0.2) # Expects 'p_intra'
            p_inter = gen_params.get('p_inter', 0.01)
            if N < n_communities:
                n_communities = N # Cannot have more communities than nodes
                warnings.warn(f"SBM N={N} < n_communities={gen_params.get('n_communities')}. Setting n_communities=N.", RuntimeWarning)
            if n_communities <= 0:
                 raise ValueError("Number of communities must be positive.")
            # Calculate community sizes as evenly as possible
            sizes = [] # Initialize empty list
            base_size = N // n_communities
            remainder = N % n_communities
            i = 0
            while i < n_communities:
                 current_size = base_size
                 if i < remainder:
                      current_size = current_size + 1
                 sizes.append(current_size)
                 i = i + 1

            # Check for zero-sized communities which SBM doesn't allow
            if 0 in sizes:
                 raise ValueError(f"SBM calculation resulted in zero-sized community for N={N}, communities={n_communities}")

            # Create probability matrix
            probs = []
            row_idx = 0
            while row_idx < n_communities:
                 row = []
                 col_idx = 0
                 while col_idx < n_communities:
                      if row_idx == col_idx:
                           row.append(p_intra) # Intra-community probability
                      else:
                           row.append(p_inter) # Inter-community probability
                      col_idx = col_idx + 1
                 probs.append(row)
                 row_idx = row_idx + 1
            G = nx.stochastic_block_model(sizes=sizes, p=probs, seed=seed)
        elif model_name == 'RGG':
            radius = gen_params.get('radius', 0.1) # Expects 'radius'
            G = nx.random_geometric_graph(n=N, radius=radius, seed=seed)
        else:
            raise ValueError(f"Unknown graph model: {model_name}")

    except Exception as e:
        G = nx.Graph() # Return empty graph on failure
        warnings.warn(f"Graph generation failed for {model_name} N={N} with params {params}: {e}", RuntimeWarning)
        # Optionally print traceback for debugging
        traceback.print_exc(limit=1)


    # Relabel nodes to strings if needed
    num_nodes_generated = G.number_of_nodes()
    if num_nodes_generated > 0:
         needs_relabel = False
         node_iter = iter(G.nodes())
         try:
              first_node = next(node_iter)
              if not isinstance(first_node, str):
                   needs_relabel = True
              # Check remaining only if first wasn't a string (optimization)
              if needs_relabel is False: # Only check others if the first was okay
                   while True:
                       node = next(node_iter)
                       if not isinstance(node, str):
                           needs_relabel = True
                           break # Found one, no need to check more
         except StopIteration:
              pass # Finished iterating

         if needs_relabel:
              node_mapping = {}
              node_idx = 0
              original_nodes = list(G.nodes()) # Create a list to iterate over original nodes
              while node_idx < len(original_nodes):
                   node = original_nodes[node_idx]
                   node_mapping[node] = str(node) # Use string representation
                   node_idx = node_idx + 1
              G = nx.relabel_nodes(G, node_mapping, copy=False) # Use copy=False for efficiency
    return G

# --- 3. Metrics Calculation Helpers (Copied from Phase 1 Cell 2) ---
def calculate_variance_norm(final_states_array):
    """Calculates variance across nodes, averaged across dimensions."""
    if final_states_array is None: return np.nan
    # Ensure it's a numpy array
    if not isinstance(final_states_array, np.ndarray):
        try: final_states_array = np.array(final_states_array)
        except Exception: return np.nan

    num_nodes = final_states_array.shape[0]
    if num_nodes == 0: return 0.0 # Variance is 0 for no nodes
    # Check for sufficient dimensions
    if final_states_array.ndim < 2: return np.var(final_states_array) if num_nodes > 0 else 0.0 # Handle 1D case

    try:
        variance_per_dim = np.var(final_states_array, axis=0)
        mean_variance = np.mean(variance_per_dim)
        # Check for NaN/Inf in result
        if np.isnan(mean_variance) or np.isinf(mean_variance):
            return np.nan
        else:
            return mean_variance
    except Exception as e_var:
        warnings.warn(f"Variance norm calculation failed: {e_var}", RuntimeWarning)
        return np.nan

def calculate_entropy_binned(data_vector, bins=10, range_lims=(-1.5, 1.5)):
    """Calculates Shannon entropy for a single dimension using numpy histogram."""
    if data_vector is None: return np.nan
    # Ensure data_vector is numpy array
    if not isinstance(data_vector, np.ndarray):
        try: data_vector = np.array(data_vector)
        except Exception: return np.nan

    if data_vector.size <= 1: return 0.0 # Entropy is 0 for single point or empty
    try:
        valid_data = data_vector[~np.isnan(data_vector)] # Filter NaNs
        if valid_data.size <= 1: return 0.0
        # Check range validity
        use_dynamic_range = False
        if range_lims is None or len(range_lims) != 2 or range_lims[0] >= range_lims[1]:
            warnings.warn(f"Invalid/missing range_lims for entropy: {range_lims}. Using data min/max.", RuntimeWarning)
            use_dynamic_range = True

        if use_dynamic_range:
            min_val = np.min(valid_data)
            max_val = np.max(valid_data)
            # If all values are the same, entropy is 0
            if abs(min_val - max_val) < 1e-9: return 0.0
            hist_range = (min_val, max_val)
        else:
            hist_range = range_lims

        # Check number of bins
        if not isinstance(bins, int) or bins <= 0:
            warnings.warn(f"Invalid bins value: {bins}. Using default 10.", RuntimeWarning)
            bins = 10

        counts, bin_edges = np.histogram(valid_data, bins=bins, range=hist_range)
        # Filter out zero counts before calculating probabilities
        non_zero_counts = counts[counts > 0]
        if non_zero_counts.size == 0: return 0.0 # No counts in any bin within the range

        # Calculate entropy using scipy's entropy function (base=None means natural log)
        entropy_value = calculate_scipy_entropy(non_zero_counts, base=None)
        # Normalize by log(number of bins) if desired (optional, commented out)
        # max_entropy = np.log(bins)
        # normalized_entropy = entropy_value / max_entropy if max_entropy > 0 else 0.0
        return entropy_value
    except Exception as e_ent:
        warnings.warn(f"Entropy calculation failed: {e_ent}", RuntimeWarning)
        return np.nan

def calculate_pairwise_dot_energy(final_states_array, adj_matrix_coo):
    """Calculates E = -0.5 * sum_{i<j} A[i,j] * dot(Si, Sj) using numpy and sparse COO"""
    total_energy = 0.0
    if final_states_array is None: return np.nan
    # Ensure it's a numpy array
    if not isinstance(final_states_array, np.ndarray):
         try: final_states_array = np.array(final_states_array)
         except Exception: return np.nan

    num_nodes = final_states_array.shape[0]
    state_dim = final_states_array.shape[1] if final_states_array.ndim > 1 else 1
    if num_nodes == 0: return 0.0
    if adj_matrix_coo is None: return 0.0

    try:
        # Ensure COO format
        if not isinstance(adj_matrix_coo, coo_matrix):
             try: adj_matrix_coo = coo_matrix(adj_matrix_coo)
             except Exception: warnings.warn("Failed to convert adj matrix to COO for energy calc.", RuntimeWarning); return np.nan

        # Pre-extract COO data
        adj_row = adj_matrix_coo.row
        adj_col = adj_matrix_coo.col
        adj_data = adj_matrix_coo.data
        num_edges = len(adj_data)
        edge_idx = 0

        # Iterate through sparse matrix non-zero elements using while loop
        while edge_idx < num_edges:
            i = adj_row[edge_idx]
            j = adj_col[edge_idx]
            weight = adj_data[edge_idx]

            # Process only upper triangle (i < j) to avoid double counting for undirected graphs
            # Also ensure indices are within bounds
            if i < j and i < num_nodes and j < num_nodes:
                state_i = final_states_array[i, :]
                state_j = final_states_array[j, :]
                # Check if states are valid before dot product
                if not np.isnan(state_i).any() and not np.isnan(state_j).any():
                     dot_product = np.dot(state_i, state_j)
                     if not np.isnan(dot_product): # Check dot product result
                         total_energy = total_energy + (weight * dot_product)
                     # else: warnings.warn(f"NaN dot product encountered for edge ({i},{j}). Skipping.", RuntimeWarning) # Reduce verbosity
                # else: warnings.warn(f"NaN state vector encountered for node {i} or {j}. Skipping edge.", RuntimeWarning) # Reduce verbosity
            # else: # Debugging index issues if needed
            #     if not (i < num_nodes and j < num_nodes): warnings.warn(f"Index out of bounds ({i},{j} vs N={num_nodes}). Skipping edge.", RuntimeWarning)

            edge_idx = edge_idx + 1 # Increment loop counter

        # Apply the -0.5 factor
        final_energy = -0.5 * total_energy
        return final_energy
    except Exception as e_en:
        warnings.warn(f"Energy calculation failed: {e_en}", RuntimeWarning)
        traceback.print_exc(limit=1)
        return np.nan

# --- 3.5 NEW: Phase 2 Metrics Calculation Helpers ---
def calculate_mean_final_state_entropy(final_states_array, bins=10, range_lims=(-1.5, 1.5)):
    """Calculates entropy for each dimension and averages."""
    if final_states_array is None: return np.nan
    # Ensure numpy array
    if not isinstance(final_states_array, np.ndarray):
        try: final_states_array = np.array(final_states_array)
        except Exception: return np.nan

    if final_states_array.ndim < 2: return np.nan # Need at least 2 dimensions (node, state_dim)
    num_dims = final_states_array.shape[1]
    if num_dims == 0: return 0.0 # No dimensions, entropy is 0

    entropies = []
    dim_idx = 0
    while dim_idx < num_dims:
        entropy_val = calculate_entropy_binned(final_states_array[:, dim_idx], bins=bins, range_lims=range_lims)
        # Only append valid entropy values
        if not np.isnan(entropy_val):
            entropies.append(entropy_val)
        dim_idx = dim_idx + 1

    # Calculate mean only if we have valid entropies
    if len(entropies) > 0:
        mean_entropy = np.mean(entropies)
        return mean_entropy
    else:
        # Return NaN if no valid entropies were calculated (e.g., all input arrays were constant)
        return np.nan

def calculate_relaxation_time(avg_change_history, conv_thresh, perturbation_end_step):
    """Estimates steps to reconverge after perturbation end."""
    # Check if history is valid and long enough
    if avg_change_history is None: return np.nan
    if not isinstance(avg_change_history, (list, np.ndarray)): return np.nan
    history_len = len(avg_change_history)
    # Check if perturbation end step is valid within the history
    if not isinstance(perturbation_end_step, int) or perturbation_end_step < 0 or perturbation_end_step >= history_len:
        return np.nan # Invalid end step

    steps_after_perturb = 0
    index = perturbation_end_step # Start checking from the step AFTER perturbation ends
    converged_after_perturb = False
    while index < history_len:
        current_avg_change = avg_change_history[index]
        # Check if change is below threshold
        if current_avg_change < conv_thresh:
            converged_after_perturb = True
            break # Stop counting at first convergence
        steps_after_perturb += 1
        index += 1

    # Return steps if converged, otherwise return NaN (or max steps checked?)
    # Returning NaN indicates it *never* reached threshold within the observed history
    if converged_after_perturb:
        return steps_after_perturb
    else:
        return np.nan # Indicate failure to converge after perturbation within max_steps

def calculate_perturbation_spread(final_states_perturbed, final_states_baseline, threshold=0.1):
    """Calculates fraction of nodes significantly changed by perturbation (Euclidean distance)."""
    # Basic validation
    if final_states_perturbed is None or final_states_baseline is None: return np.nan
    if not isinstance(final_states_perturbed, np.ndarray) or not isinstance(final_states_baseline, np.ndarray): return np.nan
    if final_states_perturbed.shape != final_states_baseline.shape: return np.nan

    num_nodes = final_states_perturbed.shape[0]
    if num_nodes == 0: return 0.0 # No nodes, no spread

    try:
        # Calculate Euclidean distance squared for efficiency, then take sqrt
        diff_sq = (final_states_perturbed - final_states_baseline)**2
        # Sum squared differences across state dimensions
        dist_sq_per_node = np.sum(diff_sq, axis=1)
        # Take square root to get Euclidean distance
        distances = np.sqrt(dist_sq_per_node)

        # Handle potential NaNs in distances if states had NaNs (should be filtered earlier ideally)
        valid_distances = distances[~np.isnan(distances)]
        if valid_distances.size == 0: return 0.0 # No valid distances to compare

        # Count nodes where distance exceeds threshold
        nodes_affected = np.sum(valid_distances > threshold)
        # Calculate fraction relative to the number of nodes with valid distances
        spread_fraction = float(nodes_affected) / float(valid_distances.size)
        return spread_fraction
    except Exception as e_spread:
        warnings.warn(f"Perturbation spread calculation failed: {e_spread}", RuntimeWarning)
        return np.nan

# --- 4. Core PyTorch Step Function (Corrected JIT dim argument) ---
# JIT requires type hints for rule params
@torch.jit.script
def hdc_5d_step_vectorized_torch(adj_sparse_tensor, current_states_tensor,
                                 rule_params_activation_threshold: float, rule_params_activation_increase_rate: float,
                                 rule_params_activation_decay_rate: float, rule_params_inhibition_threshold: float, # Unused but kept for signature
                                 rule_params_inhibition_increase_rate: float, # Unused
                                 rule_params_inhibition_decay_rate: float,
                                 rule_params_inhibition_feedback_threshold: float, rule_params_inhibition_feedback_strength: float,
                                 rule_params_diffusion_factor: float, rule_params_noise_level: float,
                                 rule_params_harmonic_factor: float, rule_params_w_decay_rate: float,
                                 rule_params_x_decay_rate: float, rule_params_y_decay_rate: float,
                                 device: torch.device):
    """ PyTorch implementation of the 5D HDC step function for GPU (JIT Compatible). """
    num_nodes = current_states_tensor.shape[0]
    state_dim = current_states_tensor.shape[1] # Should be 5, but check if possible
    if num_nodes == 0:
        return current_states_tensor, torch.tensor(0.0, device=device)

    # Extract states - Ensure slicing works if state_dim < 5
    current_u = current_states_tensor[:, 0]
    current_v = current_states_tensor[:, 1] if state_dim > 1 else torch.zeros_like(current_u)
    current_w = current_states_tensor[:, 2] if state_dim > 2 else torch.zeros_like(current_u)
    current_x = current_states_tensor[:, 3] if state_dim > 3 else torch.zeros_like(current_u)
    current_y = current_states_tensor[:, 4] if state_dim > 4 else torch.zeros_like(current_u)

    # Neighbor aggregation
    adj_float = adj_sparse_tensor.float()
    sum_neighbor_states = torch.sparse.mm(adj_float, current_states_tensor)
    # Calculate degrees robustly
    # *** CORRECTED LINE: Use dim=(1,) ***
    degrees = torch.sparse.sum(adj_float, dim=(1,)).to_dense() # Convert degrees to dense tensor
    degrees = degrees.unsqueeze(1) # Reshape to [N, 1] for broadcasting
    # Avoid division by zero for isolated nodes
    degrees = torch.max(degrees, torch.tensor(1.0, device=device))
    mean_neighbor_states = sum_neighbor_states / degrees
    # Use only 'u' dimension for activation influence
    neighbor_u_sum = sum_neighbor_states[:, 0]
    activation_influences = neighbor_u_sum # Using sum for activation influence

    # Initialize Deltas
    delta_u = torch.zeros_like(current_u)
    delta_v = torch.zeros_like(current_v) # Need even if state_dim < 2 for stacking later
    delta_w = torch.zeros_like(current_w) # Need even if state_dim < 3
    delta_x = torch.zeros_like(current_x) # Need even if state_dim < 4
    delta_y = torch.zeros_like(current_y) # Need even if state_dim < 5

    # Apply Activation rules (u dimension)
    act_increase_mask = activation_influences > rule_params_activation_threshold
    increase_u_val = rule_params_activation_increase_rate * (1.0 - current_u) # Base increase towards 1
    # Apply increase only where mask is True
    delta_u = torch.where(act_increase_mask, delta_u + increase_u_val, delta_u)
    # Apply decay universally
    delta_u = delta_u - (rule_params_activation_decay_rate * current_u)

    # Apply Inhibition rules (v dimension) - Only if state_dim > 1
    if state_dim > 1:
        inh_fb_mask = current_u > rule_params_inhibition_feedback_threshold
        increase_v_val = rule_params_inhibition_feedback_strength * (1.0 - current_v) # Base increase towards 1
        # Apply increase only where mask is True
        delta_v = torch.where(inh_fb_mask, delta_v + increase_v_val, delta_v)
        # Apply decay universally
        delta_v = delta_v - (rule_params_inhibition_decay_rate * current_v)

    # Apply Other decays (w, x, y dimensions) - Only if state_dim > 2, 3, 4 respectively
    if state_dim > 2:
        delta_w = delta_w - (rule_params_w_decay_rate * current_w)
    if state_dim > 3:
        delta_x = delta_x - (rule_params_x_decay_rate * current_x)
    if state_dim > 4:
        delta_y = delta_y - (rule_params_y_decay_rate * current_y)

    # Combine deltas - Stack only the dimensions that exist
    delta_list = [delta_u]
    if state_dim > 1: delta_list.append(delta_v)
    if state_dim > 2: delta_list.append(delta_w)
    if state_dim > 3: delta_list.append(delta_x)
    if state_dim > 4: delta_list.append(delta_y)
    delta_states = torch.stack(delta_list, dim=1)

    next_states_intermediate = current_states_tensor + delta_states

    # Diffusion
    diffusion_change = rule_params_diffusion_factor * (mean_neighbor_states - current_states_tensor)
    next_states_intermediate = next_states_intermediate + diffusion_change

    # Harmonic Term (acts on u dimension)
    # Check harmonic factor is non-zero using a tolerance for floating point comparison
    if abs(rule_params_harmonic_factor) > 1e-9:
        # Ensure degrees has shape [N] for multiplication with sin output
        harmonic_effect = rule_params_harmonic_factor * degrees.squeeze(-1) * torch.sin(neighbor_u_sum)
        # Add effect only to the 'u' dimension (index 0)
        next_states_intermediate[:, 0] = next_states_intermediate[:, 0] + harmonic_effect

    # Noise
    # Apply noise with the same shape as the current state tensor
    noise = torch.rand_like(current_states_tensor).uniform_(-rule_params_noise_level, rule_params_noise_level)
    next_states_noisy = next_states_intermediate + noise

    # Clip
    next_states_clipped = torch.clamp(next_states_noisy, min=-1.5, max=1.5)

    # Change metric (average absolute change across all nodes and dimensions)
    avg_state_change = torch.mean(torch.abs(next_states_clipped - current_states_tensor))

    return next_states_clipped, avg_state_change


# --- 5. MODIFIED Simulation Instance Runner for Phase 2 ---
def run_single_instance_phase2(
    graph, N, instance_params, trial_seed, rule_params_in,
    max_steps, conv_thresh, state_dim,
    calculate_energy=False, store_energy_history=False, # Inherited from Phase 1
    energy_type='pairwise_dot', metrics_to_calc=None, device=None, # Inherited
    # --- Phase 2 Additions ---
    store_state_history=False, # Flag to store history
    state_history_interval=1,  # Interval for storing history
    perturbation_params=None, # Dictionary with perturbation details
    phase2_metrics_to_calc=None # List of Phase 2 specific metrics
    ):
    """
    Runs one NA simulation, MODIFIED for Phase 2.
    Includes state history storage, perturbation application, and calculation
    of Phase 2 metrics (e.g., relaxation time, spread, state entropy).
    """
    # --- Combine metric lists ---
    all_metrics_requested = (metrics_to_calc or []) + (phase2_metrics_to_calc or [])
    all_metrics_requested = sorted(list(set(all_metrics_requested))) # Unique sorted list

    # --- Default Error Result ---
    nan_results = {metric: np.nan for metric in all_metrics_requested}
    # Add standard outputs expected from Phase 1 worker
    nan_results.update({
        'convergence_time': 0, 'termination_reason': 'error_before_start',
        'final_state_vector': None, 'final_energy': np.nan, 'energy_monotonic': False,
        'error_message': 'Initialization failed'
    })
    # Add Phase 2 specific outputs that might be expected
    nan_results.update({
        'state_history': None, 'avg_change_history': None,
        'relaxation_time': np.nan, 'perturbation_spread': np.nan,
        'mean_final_state_entropy': np.nan,
        'baseline_state_for_spread': None
    })
    # Identify primary sweep param for error reporting
    primary_metric_name_default = instance_params.get('primary_metric', 'variance_norm')
    nan_results['order_parameter'] = np.nan
    nan_results['metric_name'] = primary_metric_name_default
    nan_results['sensitivity_param_name'] = instance_params.get('rule_param_name')
    nan_results['sensitivity_param_value'] = instance_params.get('rule_param_value')
    # Find the sweep parameter key dynamically
    param_key_nan = 'unknown_sweep_param' # Default
    param_key_iter = iter(instance_params.keys())
    current_key = next(param_key_iter, None)
    while current_key is not None:
         if isinstance(current_key, str) and current_key.endswith('_value'):
              param_key_nan = current_key
              break
         current_key = next(param_key_iter, None)
    nan_results[param_key_nan] = instance_params.get(param_key_nan, np.nan)

    # --- Main Try-Except Block ---
    try:
        # --- Setup (Similar to Phase 1, ensure device handling) ---
        if graph is None or graph.number_of_nodes() == 0:
             nan_results['termination_reason']='empty_graph'; nan_results['error_message']='Received empty graph'; return nan_results

        # Handle device specification (string or torch.device)
        local_device = None
        if isinstance(device, torch.device):
            local_device = device
        elif isinstance(device, str):
            try:
                local_device = torch.device(device)
            except Exception as e_dev:
                nan_results['termination_reason'] = 'device_error'; nan_results['error_message'] = f'Invalid device string: {device}, Error: {e_dev}'; return nan_results
        else:
            # Default to CPU if device argument is invalid or missing
            local_device = torch.device('cpu')

        # Seeding
        np.random.seed(trial_seed); torch.manual_seed(trial_seed)
        if local_device.type == 'cuda':
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(trial_seed)
            else:
                # Handle case where CUDA is requested but not available
                nan_results['termination_reason'] = 'cuda_error'; nan_results['error_message'] = f'CUDA specified ({device}) but unavailable on worker.'; return nan_results

        # Graph Processing (Adjacency Matrix)
        node_list = sorted(list(graph.nodes())); num_nodes = len(node_list); adj_scipy_coo = None; adj_sparse_tensor = None
        try:
             # Get COO sparse matrix using NetworkX
             adj_scipy_coo = nx.adjacency_matrix(graph, nodelist=node_list, weight=None).tocoo() # Use weight=None for unweighted adjacency
             adj_indices = torch.LongTensor(np.vstack((adj_scipy_coo.row, adj_scipy_coo.col)))
             adj_values = torch.ones(len(adj_scipy_coo.data), dtype=torch.float32) # Use 1 for unweighted graph
             adj_shape = adj_scipy_coo.shape
             # Create sparse tensor on the target device
             adj_sparse_tensor = torch.sparse_coo_tensor(adj_indices, adj_values, adj_shape, device=local_device)
        except Exception as adj_e:
             nan_results['termination_reason'] = 'adj_error'; nan_results['error_message'] = f'Adj matrix creation failed: {adj_e}'; return nan_results

        # Rule Parameters
        rule_params = rule_params_in.copy()
        if instance_params.get('rule_param_name') and instance_params.get('rule_param_value') is not None:
             rule_params[instance_params['rule_param_name']] = instance_params['rule_param_value']
        # Extract individual params for JIT function, with defaults
        rp_act_thresh=float(rule_params.get('activation_threshold', 0.5)); rp_act_inc=float(rule_params.get('activation_increase_rate', 0.15)); rp_act_dec=float(rule_params.get('activation_decay_rate', 0.05))
        rp_inh_thresh=float(rule_params.get('inhibition_threshold', 0.5)); rp_inh_inc=float(rule_params.get('inhibition_increase_rate', 0.1)); rp_inh_dec=float(rule_params.get('inhibition_decay_rate', 0.1))
        rp_inh_fb_thresh=float(rule_params.get('inhibition_feedback_threshold', 0.6)); rp_inh_fb_str=float(rule_params.get('inhibition_feedback_strength', 0.3))
        rp_diff=float(rule_params.get('diffusion_factor', 0.05)); rp_noise=float(rule_params.get('noise_level', 0.001)); rp_harm=float(rule_params.get('harmonic_factor', 0.05))
        rp_w_dec=float(rule_params.get('w_decay_rate', 0.05)); rp_x_dec=float(rule_params.get('x_decay_rate', 0.05)); rp_y_dec=float(rule_params.get('y_decay_rate', 0.05))

        # --- Initialization ---
        # Initial state tensor on the target device
        initial_states_tensor = torch.FloatTensor(num_nodes, state_dim).uniform_(-0.1, 0.1).to(local_device)
        current_states_tensor = initial_states_tensor

        # History Storage Initialization
        state_history_list = [] # Stores state numpy arrays (on CPU)
        avg_change_history_list = [] # Stores scalar avg change values
        energy_history_np = [] # Stores scalar energy values (if calculated)

        if store_state_history and state_history_interval > 0:
             # Ensure interval is at least 1
             current_state_history_interval = max(1, int(state_history_interval))
             # Store initial state (step 0)
             state_history_list.append(current_states_tensor.cpu().numpy().copy())

        if calculate_energy and store_energy_history:
            try:
                 initial_energy = calculate_pairwise_dot_energy(current_states_tensor.cpu().numpy(), adj_scipy_coo)
                 energy_history_np.append(initial_energy)
            except Exception: energy_history_np.append(np.nan) # Handle potential error

        # Perturbation Setup
        is_perturbation_run = False # Default
        perturb_start = -1; perturb_end = -1; perturb_nodes_indices = []; perturb_dim = -1; perturb_val = 0.0
        baseline_final_state_for_spread = None # Store unperturbed state if needed

        if perturbation_params is not None and isinstance(perturbation_params, dict):
            perturb_start = perturbation_params.get('apply_at_step', -1)
            perturb_duration = perturbation_params.get('duration_steps', 0)
            perturb_node_frac = perturbation_params.get('target_node_fraction', 0)
            perturb_dim = perturbation_params.get('target_dimension', -1)
            perturb_val = perturbation_params.get('perturbation_value', 0.0)

            # Validate perturbation params
            valid_perturb_params = True
            if not isinstance(perturb_start, int) or perturb_start < 0 or perturb_start >= max_steps: valid_perturb_params = False
            if not isinstance(perturb_duration, int) or perturb_duration <= 0: valid_perturb_params = False
            if not isinstance(perturb_node_frac, (float, int)) or not (0 < perturb_node_frac <= 1.0): valid_perturb_params = False
            if not isinstance(perturb_dim, int) or not (0 <= perturb_dim < state_dim): valid_perturb_params = False
            # Allow perturbation value to be any float

            if valid_perturb_params:
                 is_perturbation_run = True
                 perturb_end = perturb_start + perturb_duration # End step is exclusive
                 # Select nodes to perturb
                 num_perturb_nodes = max(1, int(num_nodes * perturb_node_frac))
                 # Ensure we don't select more nodes than available
                 num_perturb_nodes = min(num_perturb_nodes, num_nodes)
                 perturb_nodes_indices = np.random.choice(num_nodes, num_perturb_nodes, replace=False).tolist()
                 # print(f"Worker {trial_seed}: Perturbation enabled. Steps {perturb_start}-{perturb_end}, Nodes: {len(perturb_nodes_indices)}, Dim: {perturb_dim}, Val: {perturb_val}") # Debug print
            else:
                 warnings.warn(f"Invalid perturbation parameters: {perturbation_params}. Disabling perturbation.", RuntimeWarning)
                 is_perturbation_run = False


        # --- Simulation Loop ---
        termination_reason = "max_steps_reached"
        steps_run = 0
        avg_change_cpu = torch.inf # Initialize with infinity
        next_states_tensor = None # Define before loop

        step = 0
        while step < max_steps:
            steps_run = step + 1

            # --- Apply Perturbation ---
            perturbation_active_this_step = False
            input_state_tensor = current_states_tensor # Default input is current state
            if is_perturbation_run:
                 is_within_perturb_window = step >= perturb_start and step < perturb_end
                 if is_within_perturb_window:
                      perturbation_active_this_step = True
                      # Create a copy to modify for perturbation step
                      perturbed_state_tensor = current_states_tensor.clone()
                      # Clamp the target dimension for the selected nodes
                      perturbed_state_tensor[perturb_nodes_indices, perturb_dim] = perturb_val
                      # Use the perturbed state as input for this step's calculation
                      input_state_tensor = perturbed_state_tensor
                      # Note: input_state_tensor now references perturbed_state_tensor
                 # else: input_state_tensor remains current_states_tensor

            # --- Execute GPU Step ---
            try:
                next_states_tensor, avg_change_tensor = hdc_5d_step_vectorized_torch(
                    adj_sparse_tensor, input_state_tensor, # Use potentially perturbed input
                    rp_act_thresh, rp_act_inc, rp_act_dec, rp_inh_thresh, rp_inh_inc, rp_inh_dec,
                    rp_inh_fb_thresh, rp_inh_fb_str, rp_diff, rp_noise, rp_harm,
                    rp_w_dec, rp_x_dec, rp_y_dec, local_device
                )
            except Exception as step_e:
                 termination_reason = "error_in_gpu_step"; nan_results['termination_reason'] = termination_reason; nan_results['convergence_time'] = steps_run
                 nan_results['error_message'] = f"GPU step {steps_run} fail: {type(step_e).__name__}: {step_e}|TB:{traceback.format_exc(limit=1)}"
                 # Try to salvage last valid state
                 try: final_states_np_err = current_states_tensor.cpu().numpy(); nan_results['final_state_vector'] = final_states_np_err.flatten()
                 except Exception: pass
                 # Clean up GPU memory before returning error
                 # Use 'del ...' with checks for existence
                 if 'adj_sparse_tensor' in locals(): del adj_sparse_tensor
                 if 'current_states_tensor' in locals(): del current_states_tensor
                 if 'initial_states_tensor' in locals(): del initial_states_tensor
                 if 'next_states_tensor' in locals() and next_states_tensor is not None: del next_states_tensor
                 if 'input_state_tensor' in locals() and input_state_tensor is not None: del input_state_tensor
                 if 'perturbed_state_tensor' in locals() and perturbed_state_tensor is not None: del perturbed_state_tensor
                 if local_device.type == 'cuda': torch.cuda.empty_cache()
                 return nan_results # Return error dict

            # --- Store History ---
            avg_change_cpu = avg_change_tensor.item() # Get Python float
            avg_change_history_list.append(avg_change_cpu)

            # Store state history at specified intervals
            if store_state_history and state_history_interval > 0:
                # Check if current step is a multiple of interval OR the very last step
                is_storage_step = (step % current_state_history_interval == 0)
                is_last_step = (step == max_steps - 1)
                if is_storage_step or is_last_step:
                     state_history_list.append(next_states_tensor.cpu().numpy().copy())

            # Store energy history if requested
            if calculate_energy and store_energy_history:
                 try:
                     current_energy = calculate_pairwise_dot_energy(next_states_tensor.cpu().numpy(), adj_scipy_coo)
                     energy_history_np.append(current_energy)
                 except Exception: energy_history_np.append(np.nan) # Handle potential error

            # --- Check Convergence (unless perturbation is active) ---
            converged = False
            # Only check for convergence if perturbation is NOT currently active
            if not perturbation_active_this_step:
                 if avg_change_cpu < conv_thresh:
                      converged = True
                      termination_reason = f"convergence_at_step_{step+1}"
                      # If this is a perturbation run, store the baseline state IF convergence happened *before* perturbation start
                      if is_perturbation_run and step < perturb_start and baseline_final_state_for_spread is None:
                           baseline_final_state_for_spread = next_states_tensor.cpu().numpy().copy()

            # Update state for next iteration
            current_states_tensor = next_states_tensor

            # Break loop if converged (and not during perturbation phase)
            # We need to ensure the simulation runs AT LEAST until perturb_end if perturbing
            should_break = False
            if converged:
                 if is_perturbation_run:
                      # If perturbing, only break if convergence happens *after* perturbation window ends
                      if step >= perturb_end:
                           should_break = True
                 else:
                      # If not perturbing, break immediately upon convergence
                      should_break = True

            if should_break:
                 break # Exit loop

            # Increment step counter
            step = step + 1
        # --- End Simulation Loop ---

        # If loop finished due to max_steps, capture baseline state if perturbing
        if is_perturbation_run and termination_reason == "max_steps_reached" and baseline_final_state_for_spread is None:
             # Find the state just before perturbation began, if history allows
             if store_state_history and perturb_start > 0:
                  history_index_before_perturb = perturb_start // current_state_history_interval # Index in history list
                  if history_index_before_perturb < len(state_history_list):
                       baseline_final_state_for_spread = state_history_list[history_index_before_perturb]
                  # else: Could not find pre-perturbation state in history
             # else: Cannot determine baseline if history not stored

        # --- Final State & Metrics ---
        final_states_np = current_states_tensor.cpu().numpy() # Get final state to CPU

        # --- Create Results Dictionary ---
        results = {
            'convergence_time': steps_run,
            'termination_reason': termination_reason,
            'final_state_vector': final_states_np.flatten(), # Flatten for easy saving/PCA later
            'error_message': None
        }

        # --- Add Sweep Parameters to Results ---
        # Find sweep key dynamically
        param_key = 'unknown_sweep_param' # Default
        param_key_iter_res = iter(instance_params.keys())
        current_key_res = next(param_key_iter_res, None)
        while current_key_res is not None:
             if isinstance(current_key_res, str) and current_key_res.endswith('_value'):
                  param_key = current_key_res
                  break
             current_key_res = next(param_key_iter_res, None)

        if param_key != 'unknown_sweep_param':
            results[param_key] = instance_params[param_key]
        else:
            results['unknown_sweep_param'] = np.nan # Fallback
        results['sensitivity_param_name'] = instance_params.get('rule_param_name')
        results['sensitivity_param_value'] = instance_params.get('rule_param_value')

        # --- Calculate Standard Metrics (Phase 1 Style) ---
        metric_idx = 0
        if metrics_to_calc is None: metrics_to_calc = [] # Ensure it's a list
        while metric_idx < len(metrics_to_calc):
             metric = metrics_to_calc[metric_idx]
             if metric == 'variance_norm':
                  results[metric] = calculate_variance_norm(final_states_np)
             elif metric == 'entropy_dim_0': # Example specific dim entropy
                  if state_dim > 0:
                      # Check if final_states_np has enough columns
                      if final_states_np.shape[1] > 0:
                           results[metric] = calculate_entropy_binned(final_states_np[:, 0])
                      else: results[metric] = np.nan
                  else: results[metric] = np.nan
             # Add other standard metrics here if needed
             # else:
             #     # Ensure not overwriting calculated metrics like final_energy
             #     if metric not in results:
             #          results[metric] = np.nan
             metric_idx = metric_idx + 1

        # Energy Calculation (Final state)
        is_monotonic_result = np.nan # Use NaN for unknown/not calculated
        if calculate_energy:
            results['final_energy'] = calculate_pairwise_dot_energy(final_states_np, adj_scipy_coo)
            # Monotonicity check (if history stored)
            if store_energy_history and len(energy_history_np) > 1:
                 valid_energy_hist = np.array(energy_history_np)
                 valid_energy_hist = valid_energy_hist[~np.isnan(valid_energy_hist)] # Remove NaNs
                 if len(valid_energy_hist) > 1:
                      diffs = np.diff(valid_energy_hist)
                      # Check if all differences are non-positive (allowing for small numerical errors)
                      is_monotonic_result = bool(np.all(diffs <= 1e-6))
                 else: is_monotonic_result = True # Considered monotonic if only 0/1 valid points
            else: is_monotonic_result = np.nan # Cannot determine without history
            results['energy_monotonic'] = is_monotonic_result
        else:
            results['final_energy'] = np.nan
            results['energy_monotonic'] = np.nan # Can't determine without calculation

        # Primary Order Parameter
        primary_metric_name = instance_params.get('primary_metric', 'variance_norm')
        # Use .get() on results dict to fetch the calculated value
        results['order_parameter'] = results.get(primary_metric_name, np.nan)
        results['metric_name'] = primary_metric_name

        # --- Calculate Phase 2 Metrics ---
        # Store avg change sequence (can be useful for debugging convergence)
        results['avg_change_history'] = avg_change_history_list if store_state_history else None

        # Store state history if requested (potentially large!)
        # Consider only storing if specifically needed downstream
        results['state_history'] = state_history_list if store_state_history else None

        # Mean Final State Entropy (Example Info Metric)
        if 'mean_final_state_entropy' in all_metrics_requested:
             results['mean_final_state_entropy'] = calculate_mean_final_state_entropy(final_states_np)
        else: # Ensure key exists if not requested
            results['mean_final_state_entropy'] = np.nan


        # Perturbation Metrics
        if is_perturbation_run:
             # Relaxation Time
             if 'relaxation_time' in all_metrics_requested:
                   # Ensure perturb_end is valid before calculating
                   pert_end_idx = perturb_end if perturb_end is not None and perturb_end >=0 else -1
                   results['relaxation_time'] = calculate_relaxation_time(avg_change_history_list, conv_thresh, pert_end_idx)
             else: results['relaxation_time'] = np.nan

             # Perturbation Spread (Requires baseline run result - comparison done *after* collecting results)
             # Store the baseline state captured before/during loop
             results['baseline_state_for_spread'] = baseline_final_state_for_spread
             # Placeholder for spread - calculated later
             results['perturbation_spread'] = np.nan

        else: # Ensure keys exist even if perturbation didn't run
             results['relaxation_time'] = np.nan
             results['perturbation_spread'] = np.nan
             results['baseline_state_for_spread'] = None


        # --- Final Cleanup ---
        # Use 'del ...' with checks for existence more robustly
        if 'adj_sparse_tensor' in locals(): del adj_sparse_tensor
        if 'current_states_tensor' in locals(): del current_states_tensor
        if 'initial_states_tensor' in locals(): del initial_states_tensor
        if 'next_states_tensor' in locals() and next_states_tensor is not None: del next_states_tensor
        if 'input_state_tensor' in locals() and input_state_tensor is not None: del input_state_tensor
        if 'perturbed_state_tensor' in locals() and perturbed_state_tensor is not None: del perturbed_state_tensor
        if local_device.type == 'cuda': torch.cuda.empty_cache()

        return results # Return success results

    # --- Top-Level Exception Handling ---
    except Exception as worker_e:
         tb_str = traceback.format_exc(limit=2) # Get more traceback info
         nan_results['termination_reason'] = 'unhandled_worker_error'
         nan_results['error_message'] = f"Unhandled Worker Error: {type(worker_e).__name__}: {worker_e} | TB: {tb_str}"
         # Try to capture final state if possible
         try:
             if 'current_states_tensor' in locals() and current_states_tensor is not None:
                 nan_results['final_state_vector'] = current_states_tensor.cpu().numpy().flatten()
         except Exception: pass # Ignore errors during error handling
         # Attempt cleanup even during error
         try:
             if 'adj_sparse_tensor' in locals(): del adj_sparse_tensor
             if 'current_states_tensor' in locals(): del current_states_tensor
             if 'initial_states_tensor' in locals(): del initial_states_tensor
             if 'next_states_tensor' in locals() and next_states_tensor is not None: del next_states_tensor
             if 'input_state_tensor' in locals() and input_state_tensor is not None: del input_state_tensor
             if 'perturbed_state_tensor' in locals() and perturbed_state_tensor is not None: del perturbed_state_tensor
             if 'local_device' in locals() and local_device is not None and local_device.type == 'cuda':
                 torch.cuda.empty_cache()
         except NameError: pass # Variables might not be defined if error happened early
         return nan_results

# --- 6. Fitting Function (Copied from Phase 1 Cell 2) ---
def reversed_sigmoid_func(x, A, x0, k, C):
    """Reversed sigmoid function (decreasing S-shape). Includes numerical stability."""
    try:
        # Ensure input is a numpy array
        x = np.asarray(x, dtype=float)
        # Calculate the exponent term, clipping to prevent overflow/underflow
        exp_term = k * (x - x0)
        # Clip range determined empirically or theoretically; -700 to 700 covers most double precision
        exp_term = np.clip(exp_term, -700, 700)
        # Calculate denominator, adding small epsilon if needed, though clip often suffices
        denominator = 1.0 + np.exp(exp_term)
        # Avoid division by zero, although np.exp result should be positive
        denominator = np.where(denominator == 0, 1e-300, denominator) # Use a very small number
        # Calculate the final result
        result = A / denominator + C
        # Handle potential NaNs or Infs resulting from extreme values (though clip helps)
        result = np.nan_to_num(result, nan=np.nan, posinf=np.nan, neginf=np.nan)
        return result
    except Exception as e_sig:
        warnings.warn(f"Sigmoid calculation failed: {e_sig}", RuntimeWarning)
        # Return an array of NaNs with the same shape as input x on error
        return np.full_like(x, np.nan, dtype=float)


# --- 7. UMAP Helper Function ---
def run_umap_analysis(data_matrix, umap_params, random_state=42):
    """ Performs UMAP dimensionality reduction. """
    if not UMAP_AVAILABLE:
        print("UMAP not available. Skipping UMAP analysis.")
        return None, None

    # Check if data_matrix is valid
    if data_matrix is None:
         print("⚠️ UMAP input data_matrix is None. Skipping.")
         return None, None
    if not isinstance(data_matrix, np.ndarray):
         print("⚠️ UMAP input data_matrix is not a NumPy array. Skipping.")
         return None, None

    # Check minimum number of samples required by UMAP (n_neighbors)
    n_neighbors_umap = umap_params.get('n_neighbors', 5) # Default UMAP n_neighbors is 15, but use 5 as a safe minimum check
    if data_matrix.shape[0] < n_neighbors_umap:
        print(f"⚠️ Not enough data points ({data_matrix.shape[0]}) for UMAP with n_neighbors={n_neighbors_umap}. Skipping.")
        return None, None

    print(f"  Running UMAP with params: {umap_params} on data shape {data_matrix.shape}")
    try:
        # Initialize UMAP reducer
        reducer = umap.UMAP(
            n_neighbors=umap_params.get('n_neighbors', 15),
            min_dist=umap_params.get('min_dist', 0.1),
            n_components=umap_params.get('n_components', 2),
            metric=umap_params.get('metric', 'euclidean'),
            random_state=random_state,
            verbose=False # Keep console clean
        )
        # Fit and transform the data
        embedding = reducer.fit_transform(data_matrix)
        print(f"  UMAP embedding generated with shape: {embedding.shape}")
        return embedding, reducer # Return both embedding and the fitted reducer model
    except Exception as e_umap:
        print(f"❌ Error during UMAP analysis: {e_umap}")
        traceback.print_exc(limit=2)
        return None, None


print("✅ Phase 2 Helper functions defined (incl. modified worker `run_single_instance_phase2`, UMAP helper, JIT fix).")
print("\nCell 2 execution complete.")

✅ UMAP imported successfully.

--- Cell 2: Helper Function Definitions (Phase 2 Implementation - Updated Worker - JIT Fix) ---
✅ Phase 2 Helper functions defined (incl. modified worker `run_single_instance_phase2`, UMAP helper, JIT fix).

Cell 2 execution complete.


In [4]:
# Cell 4: Order Parameter Function Definitions (Emergenics - Full)
# Description: Defines functions to compute order parameters from 5D simulation states.
# Includes calculation of flattened state vector.
# Adheres strictly to one statement per line after colons.

import numpy as np
from scipy.stats import entropy as scipy_entropy
import pandas as pd
import warnings

print("\n--- Cell 4: Order Parameter Function Definitions (Emergenics - Full) ---")

# --- Helper: Convert State Dictionary to Numpy Array ---
# This function was part of an older design using state dictionaries.
# The current worker `run_single_instance_phase2` returns numpy arrays directly.
# Keeping it here for potential backward compatibility or alternative use cases.
def state_dict_to_array(state_dict, node_list_local, state_dim):
    """Converts a state dictionary {node_id: state_vector} to a NumPy array [N, D]."""
    num_nodes = 0
    if isinstance(node_list_local, list):
        num_nodes = len(node_list_local)

    # Initialize array with NaNs
    state_array = np.full((num_nodes, state_dim), np.nan, dtype=float)

    # Check if input is a valid dictionary
    if not isinstance(state_dict, dict):
        warnings.warn("state_dict_to_array received non-dict input.", RuntimeWarning)
        return state_array # Return NaN array

    # Check if node list is valid
    if num_nodes == 0:
         warnings.warn("state_dict_to_array received empty node list.", RuntimeWarning)
         return state_array # Return NaN array

    # Define a default state vector (e.g., NaNs or zeros) for missing nodes
    default_state_vec = np.full(state_dim, np.nan, dtype=float)

    # Iterate through the provided node list to ensure correct order
    node_idx = 0
    while node_idx < num_nodes:
        node_id = node_list_local[node_idx]
        # Get the state vector from the dictionary, use default if missing
        state_vec = state_dict.get(node_id, default_state_vec)

        # Validate the retrieved state vector
        vector_is_valid = False
        if isinstance(state_vec, np.ndarray):
             if state_vec.shape == (state_dim,):
                  vector_is_valid = True

        # Assign the vector to the array if valid, otherwise NaNs remain
        if vector_is_valid:
            state_array[node_idx, :] = state_vec
        # else: state_array remains NaN at this row

        node_idx = node_idx + 1 # Increment loop counter

    return state_array

# --- Helper: Get state values for a specific dimension ---
# Also part of the older dictionary-based design.
def get_state_dimension_values(state_dict, node_list_local, dim_index, state_dim):
    """Extracts values for a single state dimension from a state dictionary."""
    # Input validation
    if not isinstance(state_dict, dict) or not state_dict:
        warnings.warn("get_state_dimension_values: Invalid state_dict.", RuntimeWarning)
        return np.array([], dtype=float) # Return empty array
    if not isinstance(node_list_local, list) or not node_list_local:
        warnings.warn("get_state_dimension_values: Invalid node_list_local.", RuntimeWarning)
        return np.array([], dtype=float)
    if not isinstance(dim_index, int) or not (0 <= dim_index < state_dim):
        warnings.warn(f"get_state_dimension_values: Invalid dim_index {dim_index} for state_dim {state_dim}.", RuntimeWarning)
        return np.array([], dtype=float)

    # Extract values
    values_list = []
    default_val = np.nan # Value to use if node or state is invalid

    node_idx = 0
    while node_idx < len(node_list_local):
        node_id = node_list_local[node_idx]
        state_vec = state_dict.get(node_id) # Get state vector or None

        # Check if the retrieved state vector is valid
        value_to_append = default_val # Assume invalid initially
        if isinstance(state_vec, np.ndarray):
             if state_vec.shape == (state_dim,):
                  # Vector is valid shape, extract the value at the specified dimension
                  value_at_dim = state_vec[dim_index]
                  # Check if the extracted value is a valid number (not NaN/Inf)
                  if np.isfinite(value_at_dim):
                       value_to_append = value_at_dim

        values_list.append(value_to_append) # Append the value (or default NaN)
        node_idx = node_idx + 1 # Increment loop counter

    # Convert the list of values to a NumPy array
    return np.array(values_list, dtype=float)

# --- Order Parameter Functions (using state dictionaries - adapt if using arrays directly) ---
# These functions assume the older state_dict format. They would need modification
# or replacement if operating directly on the final_state_np array from the worker.

def compute_variance_norm(state_dict, node_list_local, state_dim):
    """Calculates variance of the L2 norm of state vectors across nodes."""
    norms = []
    # Input validation
    dict_is_valid = isinstance(state_dict, dict)
    node_list_valid = isinstance(node_list_local, list) and len(node_list_local) > 0

    if dict_is_valid and node_list_valid:
        node_idx = 0
        while node_idx < len(node_list_local): # Iterate through nodes
            node = node_list_local[node_idx]
            vec = state_dict.get(node) # Get state vector for the node

            # Check if the vector is valid
            vec_is_valid_type = isinstance(vec, np.ndarray)
            vec_is_valid_shape = False
            if vec_is_valid_type:
                 if vec.shape == (state_dim,):
                      vec_is_valid_shape = True

            if vec_is_valid_shape:
                 # Calculate L2 norm (Euclidean distance from origin)
                 try:
                      norm_val = np.linalg.norm(vec)
                      # Check if norm is a valid finite number
                      norm_is_valid_number = False
                      if not (np.isnan(norm_val) or np.isinf(norm_val)):
                           norm_is_valid_number = True

                      if norm_is_valid_number:
                           norms.append(norm_val) # Add valid norm to list
                 except Exception as e_norm:
                      warnings.warn(f"Norm calculation failed for node {node}: {e_norm}", RuntimeWarning)
                      # Do not append norm if calculation fails

            node_idx = node_idx + 1 # Increment loop counter

    # Calculate variance if we have at least two valid norms
    variance_value = np.nan # Default to NaN
    have_enough_valid_norms = len(norms) >= 2 # Need at least 2 points for variance
    if have_enough_valid_norms:
         try:
             variance_value = np.var(norms) # Calculate variance of the collected norms
         except Exception as e_var:
              warnings.warn(f"Variance calculation failed: {e_var}", RuntimeWarning)
              variance_value = np.nan # Set to NaN on error

    return variance_value


def compute_variance_dim_N(state_dict, node_list_local, dim_index, state_dim):
    """Calculates variance of state values for a specific dimension across nodes."""
    # Get state values for the specified dimension using helper function
    state_values = get_state_dimension_values(state_dict, node_list_local, dim_index, state_dim)

    # Filter out NaN values
    valid_values = state_values[~np.isnan(state_values)]

    # Calculate variance if enough valid values exist
    variance_value = np.nan # Default to NaN
    have_enough_valid_values = valid_values.size >= 2 # Need at least 2 points for variance
    if have_enough_valid_values:
         try:
             variance_value = np.var(valid_values) # Calculate variance
         except Exception as e_var:
             warnings.warn(f"Variance calculation failed for dim {dim_index}: {e_var}", RuntimeWarning)
             variance_value = np.nan # Set to NaN on error

    return variance_value


def compute_shannon_entropy_dim_N(state_dict, node_list_local, dim_index, state_dim, num_bins=10, state_range=(-1.0, 1.0)):
    """Calculates Shannon entropy for a specific dimension using histogram binning."""
    # Get state values for the specified dimension
    state_values = get_state_dimension_values(state_dict, node_list_local, dim_index, state_dim)

    # Filter out NaN values
    valid_values = state_values[~np.isnan(state_values)]

    # Proceed if there are valid values
    shannon_entropy_value = np.nan # Default to NaN
    have_valid_values = valid_values.size > 0
    if have_valid_values:
        try:
             # Use numpy histogram to get counts in bins
             counts, bin_edges = np.histogram(valid_values, bins=num_bins, range=state_range)
             # Calculate total number of valid counts
             total_counts = counts.sum()

             # Calculate entropy only if there are counts
             if total_counts > 0:
                 # Calculate probabilities by dividing counts by total counts
                 probabilities = counts / total_counts
                 # Filter out probabilities equal to zero (log(0) is undefined)
                 non_zero_probabilities = probabilities[probabilities > 0]

                 # Calculate Shannon entropy using scipy if there are non-zero probabilities
                 if non_zero_probabilities.size > 0:
                     shannon_entropy_value = scipy_entropy(non_zero_probabilities, base=None) # Use natural log
                 else:
                     # If all probabilities are zero (can happen with weird binning/data)
                     shannon_entropy_value = 0.0
             else:
                 # If no counts in any bin (e.g., all data outside range)
                 shannon_entropy_value = 0.0

        except Exception as e:
             warnings.warn(f"Shannon entropy calculation failed for dim {dim_index}: {e}", RuntimeWarning)
             shannon_entropy_value = np.nan # Set to NaN on error
    else:
        # If no valid (non-NaN) values were found for the dimension
        shannon_entropy_value = 0.0 # Or np.nan depending on desired behavior for empty input

    return shannon_entropy_value

# --- Functions below are less relevant with the new worker but kept for context ---

def count_attractors_5d(final_states_dict_list, node_list_local, state_dim, tolerance=1e-3):
    """Counts unique final states (attractors) across multiple simulation trials."""
    # Input validation
    list_is_valid = isinstance(final_states_dict_list, list) and len(final_states_dict_list) > 0
    node_list_is_valid = isinstance(node_list_local, list) and len(node_list_local) > 0
    if not list_is_valid or not node_list_is_valid:
        warnings.warn("count_attractors: Invalid input list or node list.", RuntimeWarning)
        return 0 # Return 0 attractors for invalid input

    num_trials = len(final_states_dict_list)
    num_nodes = len(node_list_local)
    # Create a 3D numpy array to store all final states [trial, node, dim]
    final_states_array_3d = np.full((num_trials, num_nodes, state_dim), np.nan, dtype=float)

    # Convert each state dictionary in the list to an array and store it
    trial_idx = 0
    while trial_idx < num_trials:
        state_dict = final_states_dict_list[trial_idx]
        if isinstance(state_dict, dict):
            # Use helper function to convert dict to array
            state_array = state_dict_to_array(state_dict, node_list_local, state_dim)
            final_states_array_3d[trial_idx, :, :] = state_array
        # else: keep NaNs if input wasn't a dict
        trial_idx = trial_idx + 1

    # --- Identify and Filter Valid Trials ---
    # A trial is valid if not all its state values are NaN
    valid_trials_mask = ~np.isnan(final_states_array_3d).all(axis=(1, 2))
    num_valid_trials = np.sum(valid_trials_mask)

    if num_valid_trials == 0:
        return 0 # No valid final states found

    # Select only the valid trials
    final_states_array_valid = final_states_array_3d[valid_trials_mask, :, :]

    # --- Process Valid States for Uniqueness ---
    # Reshape the valid states into a 2D array [valid_trial, flattened_state_vector]
    # Each row represents the complete state of the network for one trial
    num_features = num_nodes * state_dim
    final_states_reshaped = final_states_array_valid.reshape(num_valid_trials, num_features)

    # Round the states to the specified tolerance to group similar attractors
    # Determine number of decimal places based on tolerance
    rounding_decimals = 3 # Default
    if tolerance > 0:
        # Calculate decimals needed, handling potential log10(0)
        try:
             rounding_decimals = int(-np.log10(tolerance))
             if rounding_decimals < 0 : rounding_decimals = 0 # Ensure non-negative
        except ValueError:
             rounding_decimals = 15 # Use high precision if tolerance is extremely small
    # Apply rounding
    rounded_states = np.round(final_states_reshaped, decimals=rounding_decimals)

    # --- Count Unique Attractors ---
    num_attractors = -1 # Default error value
    try:
        # Use np.unique along axis=0 to find unique rows (unique flattened states)
        unique_attractor_rows = np.unique(rounded_states, axis=0)
        # The number of unique rows is the number of distinct attractors found
        num_attractors = unique_attractor_rows.shape[0]
    except MemoryError:
        warnings.warn("MemoryError during attractor counting (np.unique). Large state space.", RuntimeWarning)
        num_attractors = -1 # Indicate memory error
    except Exception as e_uniq:
        warnings.warn(f"Error during np.unique for attractor counting: {e_uniq}.", RuntimeWarning)
        num_attractors = -1 # Indicate other error

    return num_attractors


def convergence_time_metric_5d(state_history_dict_list, node_list_local, state_dim, tolerance=1e-3):
    """Calculates convergence time based on average state change between steps."""
    # Input validation
    history_is_valid = isinstance(state_history_dict_list, list)
    history_is_long_enough = history_is_valid and len(state_history_dict_list) >= 2
    node_list_is_valid = isinstance(node_list_local, list) and len(node_list_local) > 0

    if not history_is_long_enough or not node_list_is_valid:
        return np.nan # Cannot determine convergence without sufficient history or nodes

    history_length = len(state_history_dict_list)
    convergence_step = -1 # Initialize: -1 means not converged yet
    previous_state_array = None # Store the state from the previous step

    # Iterate through the history starting from the first step (index 0)
    step_index = 0
    while step_index < history_length:
        current_state_dict = state_history_dict_list[step_index]
        # Convert current state dict to array
        is_valid_dict = isinstance(current_state_dict, dict)
        if not is_valid_dict:
            warnings.warn(f"Non-dict state found at step {step_index}. Cannot calculate convergence.", RuntimeWarning)
            return np.nan # Cannot proceed with invalid state format

        current_state_array = state_dict_to_array(current_state_dict, node_list_local, state_dim)

        # Check if the conversion was successful (no all-NaN array)
        current_state_is_valid = not np.isnan(current_state_array).all()

        # --- Compare with previous step (if available and valid) ---
        is_after_first_step = step_index > 0
        previous_state_is_available = previous_state_array is not None

        if is_after_first_step and previous_state_is_available and current_state_is_valid:
            # Calculate absolute difference between current and previous state arrays
            abs_difference = np.abs(current_state_array - previous_state_array)

            # --- Calculate Mean Absolute Change ---
            # Consider only elements where both current and previous states are valid (not NaN)
            valid_mask = ~np.isnan(current_state_array) & ~np.isnan(previous_state_array)
            can_compare = np.any(valid_mask) # Check if there's anything to compare

            mean_absolute_change = 0.0 # Default if no comparison possible
            if can_compare:
                # Calculate mean change only over valid elements
                mean_absolute_change = np.mean(abs_difference[valid_mask])

            # Check if change is below the tolerance threshold
            change_below_threshold = mean_absolute_change < tolerance
            if change_below_threshold:
                convergence_step = step_index # Record the step *at which convergence was detected*
                break # Exit the loop once convergence is detected

        # Update previous state for the next iteration, only if current state was valid
        if current_state_is_valid:
             previous_state_array = current_state_array
        else:
             # If current state is invalid, we can't use it as previous for next step
             previous_state_array = None
             warnings.warn(f"Invalid state array encountered at step {step_index}. Resetting comparison.", RuntimeWarning)


        step_index = step_index + 1 # Increment loop counter

    # --- Determine final return value ---
    convergence_detected = convergence_step != -1
    result_convergence_time = np.nan # Default if never converged

    if convergence_detected:
        result_convergence_time = convergence_step # Return the step index
    else:
        # If loop finished without convergence, return total steps run (or NaN)
        # Using history_length - 1 might be confusing, NaN indicates never converged within history
        result_convergence_time = np.nan

    return result_convergence_time


# Primary function called by worker - calculates metrics AND returns flattened state
# MODIFIED to use numpy array input directly, reflecting run_single_instance_phase2 output
def calculate_metrics_and_state_from_array(final_state_array, config_local):
    """Calculates order parameters from a final state NumPy array."""
    results = {}
    if final_state_array is None or not isinstance(final_state_array, np.ndarray) or final_state_array.ndim != 2:
         warnings.warn("calculate_metrics_and_state_from_array: Invalid final_state_array input.")
         results['variance_norm'] = np.nan
         results[f'variance_dim_0'] = np.nan # Assuming dim 0 is default analysis dim
         results[f'entropy_dim_0'] = np.nan
         results['final_state_flat'] = None
         return results

    # Get params safely
    state_dim = config_local.get('STATE_DIM', 5)
    analysis_dim = config_local.get("ANALYSIS_STATE_DIM", 0) # Which dim to analyze specifically
    # Ensure analysis_dim is valid
    if not (0 <= analysis_dim < state_dim):
         analysis_dim = 0 # Default to 0 if invalid
         warnings.warn(f"Invalid ANALYSIS_STATE_DIM in config. Defaulting to 0.", RuntimeWarning)

    bins = config_local.get("ORDER_PARAM_BINS", 10)
    s_range = tuple(config_local.get("STATE_RANGE", (-1.5, 1.5))) # Use wider range consistent with clipping

    # Calculate metrics directly from the array
    # Variance Norm (Average variance across dimensions)
    results['variance_norm'] = calculate_variance_norm(final_state_array)

    # Variance of a specific dimension
    variance_dim_key = f'variance_dim_{analysis_dim}'
    results[variance_dim_key] = np.nan # Default
    if state_dim > analysis_dim:
        try: results[variance_dim_key] = np.var(final_state_array[:, analysis_dim])
        except Exception: pass # Keep NaN on error

    # Entropy of a specific dimension
    entropy_dim_key = f'entropy_dim_{analysis_dim}'
    results[entropy_dim_key] = np.nan # Default
    if state_dim > analysis_dim:
        results[entropy_dim_key] = calculate_entropy_binned(final_state_array[:, analysis_dim], bins=bins, range_lims=s_range)

    # Get flattened state for PCA/UMAP (handle potential errors)
    final_state_flat_list = None
    try:
        # Check if array contains NaNs/Infs before flattening
        array_is_valid = not (np.isnan(final_state_array).any() or np.isinf(final_state_array).any())
        if array_is_valid:
            # Flatten the 2D array [N, D] into a 1D vector
            final_state_flat_list = final_state_array.flatten().tolist() # Convert to list for JSON possibly
        else:
            # Set to None if the array contains invalid numbers
            final_state_flat_list = None
            warnings.warn("Final state array contains NaN/Inf, cannot flatten.", RuntimeWarning)
    except Exception as e_flat:
        warnings.warn(f"Could not flatten state: {e_flat}")
        final_state_flat_list = None # Indicate failure

    results['final_state_flat'] = final_state_flat_list
    return results


print("✅ Cell 4: Order parameter functions defined (some assume older dict format, added array-based calculator).")


--- Cell 4: Order Parameter Function Definitions (Emergenics - Full) ---
✅ Cell 4: Order parameter functions defined (some assume older dict format, added array-based calculator).


In [5]:
# Cell 5: Define Graph Automaton Update Rule (5D HDC / RSV) - Emergenics
# Description: Implements the 5D HDC / RSV update rule function `simulation_step_5D_HDC_RSV`.
#              This is the conceptual CPU-based implementation. The actual sweeps use
#              the GPU version `hdc_5d_step_vectorized_torch` from Cell 2/worker_utils.py.
# Adheres strictly to one statement per line after colons.

import numpy as np
import networkx as nx
import warnings
import traceback

print("\n--- Cell 5: Rule Definition (Conceptual CPU 5D HDC / RSV Update Step) ---")
print("⚠️ Note: This CPU implementation is for conceptual understanding/testing.")
print("   Actual sweeps use the GPU version (`hdc_5d_step_vectorized_torch`).")


# Helper function for element-wise clipping (CPU version)
def clip_vector_cpu(vec, clip_range):
    """Clips a NumPy vector element-wise."""
    # Check if input is a numpy array
    if not isinstance(vec, np.ndarray):
        return vec # Return input as is if not an array
    # Check clip range format
    if not isinstance(clip_range, (list, tuple)) or len(clip_range) != 2:
        warnings.warn("Invalid clip_range format. Clipping skipped.", RuntimeWarning)
        return vec
    min_val = clip_range[0]
    max_val = clip_range[1]
    if min_val >= max_val:
         warnings.warn("Invalid clip_range: min >= max. Clipping skipped.", RuntimeWarning)
         return vec
    # Apply clipping using numpy.clip
    clipped_vec = np.clip(vec, min_val, max_val)
    return clipped_vec

# Main 5D HDC / RSV Simulation Step Function (CPU Conceptual Implementation)
def simulation_step_5D_HDC_RSV(
    graph, current_states_dict,
    node_list_local, node_to_int_local, # Mappings assumed provided if using dicts
    rule_params_local):
    """
    Performs one step of the 5D HDC/RSV simulation using CPU and state dictionaries.
    This is a conceptual implementation; GPU version is used for performance.
    """
    # --- Input Validation & Setup ---
    if not isinstance(graph, nx.Graph):
         warnings.warn("simulation_step: graph is not a NetworkX graph.", RuntimeWarning)
         return None, None, -1.0 # Indicate error
    if not isinstance(current_states_dict, dict):
         warnings.warn("simulation_step: current_states_dict is not a dictionary.", RuntimeWarning)
         return None, None, -1.0
    if not isinstance(node_list_local, list) or not node_list_local:
          warnings.warn("simulation_step: node_list_local is invalid.", RuntimeWarning)
          return None, None, -1.0
    if not isinstance(node_to_int_local, dict):
           warnings.warn("simulation_step: node_to_int_local is invalid.", RuntimeWarning)
           return None, None, -1.0
    if not isinstance(rule_params_local, dict):
         warnings.warn("simulation_step: rule_params_local is not a dictionary.", RuntimeWarning)
         return None, None, -1.0

    num_nodes = len(node_list_local)
    state_dim = 5 # Hardcoded for this specific model

    # Handle empty graph case
    if num_nodes == 0:
        return {}, None, 0.0 # Return empty dict, None pheromones, 0 change

    # --- Get Rule Parameters ---
    # Use .get() for safe access with defaults
    alpha = rule_params_local.get('hcd_alpha', 0.1) # Learning rate / step size
    clip_range = rule_params_local.get('hcd_clip_range', [-1.0, 1.0])
    use_bundling = rule_params_local.get('use_neighbor_bundling', True) # HDC bundling concept
    use_weights = rule_params_local.get('use_graph_weights', False) # Currently unused, assumes unweighted
    noise_level = rule_params_local.get('noise_level', 0.001)
    # Define default state for nodes missing from dict (shouldn't happen if initialized properly)
    default_state = np.array([0.0] * state_dim, dtype=float)

    # --- Prepare Data Structures ---
    # Find the data type of the first valid state vector for consistency
    first_valid_state = default_state # Initialize with default
    dtype_found = False
    iter_node_list = iter(node_list_local)
    while not dtype_found:
        try:
             node_id = next(iter_node_list)
             state = current_states_dict.get(node_id)
             if state is not None and isinstance(state, np.ndarray) and state.shape == (state_dim,):
                  first_valid_state = state
                  dtype_found = True
        except StopIteration:
             break # No valid states found
    state_dtype = first_valid_state.dtype # Get dtype (e.g., float64)

    # Convert state dictionary to NumPy array for efficient access
    # Using a loop for clarity as requested
    current_states_array = np.empty((num_nodes, state_dim), dtype=state_dtype)
    idx = 0
    while idx < num_nodes:
         node_id_current = node_list_local[idx]
         state_vector = current_states_dict.get(node_id_current, default_state)
         # Ensure vector shape matches state_dim, otherwise use default
         if isinstance(state_vector, np.ndarray) and state_vector.shape == (state_dim,):
              current_states_array[idx, :] = state_vector
         else:
              current_states_array[idx, :] = default_state # Fallback to default
         idx = idx + 1

    # Create array for next states, initialized with current states
    next_states_array = current_states_array.copy()

    # --- Calculate Updates Node by Node ---
    avg_change_accumulator = 0.0 # Accumulate total change
    nodes_updated_count = 0 # Count nodes processed successfully

    # Get adjacency view for efficient neighbor access
    adj = graph.adj

    # Iterate through each node using the node list
    node_idx = 0
    while node_idx < num_nodes:
        node_id = node_list_local[node_idx]
        current_node_state = current_states_array[node_idx, :]

        # 1. Bundle Neighbors (HDC concept)
        bundled_neighbor_vector = np.zeros(state_dim, dtype=state_dtype)
        if use_bundling:
            # Get neighbors for the current node
            neighbors_dict = adj.get(node_id, {}) # Returns empty dict if node has no neighbors
            valid_neighbors = []
            neighbor_iter = iter(neighbors_dict.keys())
            stop_neighbor_iter = False
            while not stop_neighbor_iter:
                try:
                     neighbor_id = next(neighbor_iter)
                     # Check if neighbor is in our known node mapping (important for consistency)
                     if neighbor_id in node_to_int_local:
                          valid_neighbors.append(neighbor_id)
                except StopIteration:
                     stop_neighbor_iter = True

            # If there are valid neighbors, calculate bundled vector
            if len(valid_neighbors) > 0:
                 # Get integer indices of neighbors
                 neighbor_indices = []
                 neighbor_idx = 0
                 while neighbor_idx < len(valid_neighbors):
                      neighbor_id = valid_neighbors[neighbor_idx]
                      int_index = node_to_int_local.get(neighbor_id, -1) # Get index from mapping
                      # Ensure index is valid
                      if 0 <= int_index < num_nodes:
                           neighbor_indices.append(int_index)
                      neighbor_idx = neighbor_idx + 1

                 # Sum the state vectors of valid neighbors
                 if len(neighbor_indices) > 0:
                      # Indexing numpy array with list of indices selects rows
                      bundled_vector_sum = np.sum(current_states_array[neighbor_indices, :], axis=0)
                      # Apply clipping to the bundled sum
                      bundled_neighbor_vector = clip_vector_cpu(bundled_vector_sum, clip_range)


        # 2. Calculate RSV scalar (deviation norm)
        # Deviation vector: difference between node's state and bundled neighbors
        deviation_vector = current_node_state - bundled_neighbor_vector
        rsv_scalar = 0.0 # Default value
        try:
            # Calculate L2 norm of the deviation vector
            norm_val = np.linalg.norm(deviation_vector)
            # Assign norm to scalar if it's a valid finite number
            if not (np.isnan(norm_val) or np.isinf(norm_val)):
                rsv_scalar = norm_val
        except Exception as e_norm_calc:
            warnings.warn(f"RSV scalar norm calculation failed for node {node_id}: {e_norm_calc}", RuntimeWarning)
            # Keep rsv_scalar as 0.0 on error

        # 3. Apply Update Rule
        # Calculate update term based on deviation and scalar
        # Update moves state away from neighbors based on deviation magnitude
        update_term = alpha * rsv_scalar * (-deviation_vector)
        # Calculate potential next state before noise/clipping
        potential_next_state = current_node_state + update_term

        # 4. Add Noise
        # Generate noise vector with same dimension as state
        noise_vector = np.random.uniform(-noise_level, noise_level, size=state_dim).astype(state_dtype)
        state_after_noise = potential_next_state + noise_vector

        # 5. Apply Clipping
        # Ensure final state stays within bounds
        final_next_state = clip_vector_cpu(state_after_noise, clip_range)

        # Store the calculated next state in the array
        next_states_array[node_idx, :] = final_next_state

        # --- Accumulate Change Metric ---
        try:
            # Calculate the magnitude of change for this node
            node_change = np.linalg.norm(final_next_state - current_node_state)
            # Add to accumulator if valid
            if not (np.isnan(node_change) or np.isinf(node_change)):
                avg_change_accumulator = avg_change_accumulator + node_change
                nodes_updated_count = nodes_updated_count + 1
        except Exception as e_change_calc:
             warnings.warn(f"Node change calculation failed for node {node_id}: {e_change_calc}", RuntimeWarning)

        # Increment node index for the main loop
        node_idx = node_idx + 1
    # --- End Node Loop ---

    # Calculate Average Change across all updated nodes
    average_change = 0.0
    if nodes_updated_count > 0:
        average_change = avg_change_accumulator / nodes_updated_count

    # Convert the NumPy array back to a state dictionary
    next_states_dict = {}
    idx = 0
    while idx < num_nodes:
        node_id_final = node_list_local[idx]
        next_states_dict[node_id_final] = next_states_array[idx, :]
        idx = idx + 1

    # Return the new state dictionary, None for pheromones (not used here), and average change
    return next_states_dict, None, average_change

    # Error Handling (Catch unexpected errors during the process)
    # Note: Specific errors handled within loops, this is a general fallback
    # except Exception as e:
    #      print(f"❌❌❌ Error in simulation_step_5D_HDC_RSV: {e}")
    #      traceback.print_exc()
    #      return None, None, -1.0 # Indicate error state


print("✅ Cell 5: Conceptual 5D HDC / RSV CPU simulation step function defined.")


--- Cell 5: Rule Definition (Conceptual CPU 5D HDC / RSV Update Step) ---
⚠️ Note: This CPU implementation is for conceptual understanding/testing.
   Actual sweeps use the GPU version (`hdc_5d_step_vectorized_torch`).
✅ Cell 5: Conceptual 5D HDC / RSV CPU simulation step function defined.


In [6]:
# Cell 6: Simulation Runner Function (Emergenics - Resumable)
# Description: Defines the simulation runner using the 5D HDC/RSV step function (Cell 5).
# Handles state dictionaries, manages checkpointing/resuming. Reduced verbosity.
# Adheres strictly to one statement per line after colons.
# Note: This runner uses the CPU implementation and state dictionaries.

import numpy as np
import networkx as nx
from tqdm.auto import tqdm
import time
import copy
import warnings
import pickle
import os
import traceback

print("\n--- Cell 6: Simulation Runner Definition (Emergenics - Resumable, CPU Dict-Based) ---")
print("⚠️ Note: This runner uses the CPU implementation from Cell 5.")

# --- State Initialization Function (5D HDC) ---
# Needs `clip_vector_cpu` defined (should be available from Cell 5)
def initialize_states_5D_HDC(node_list_local, config_local):
    """Initializes 5D HDC states based on config_local settings using CPU."""
    # Validate required config keys
    if 'INIT_MODE' not in config_local:
        raise ValueError("Missing INIT_MODE in config_local for initialization.")
    if 'STATE_DIM' not in config_local:
        raise ValueError("Missing STATE_DIM in config_local for initialization.")

    init_mode = config_local['INIT_MODE']
    state_dim = config_local['STATE_DIM']

    # Get default state, ensure it's a numpy array
    default_state_cfg = config_local.get('DEFAULT_INACTIVE_STATE', [0.0]*state_dim)
    try:
        default_state = np.array(default_state_cfg, dtype=float)
        if default_state.shape != (state_dim,): # Check shape
             raise ValueError(f"DEFAULT_INACTIVE_STATE shape mismatch: expected ({state_dim},), got {default_state.shape}")
    except Exception as e_def_state:
        raise ValueError(f"Invalid DEFAULT_INACTIVE_STATE in config: {e_def_state}")

    # Get parameters for 'random_normal' mode
    mean = config_local.get('INIT_NORMAL_MEAN', 0.0)
    stddev = config_local.get('INIT_NORMAL_STDDEV', 0.1)
    # Get clipping range from rule parameters if available
    rule_params_init = config_local.get('rule_params', {})
    clip_range = rule_params_init.get('hcd_clip_range', [-1.0, 1.0]) # Default clip range

    # Validate node list
    if not isinstance(node_list_local, list) or len(node_list_local) == 0:
         warnings.warn("initialize_states received empty or invalid node list.", RuntimeWarning)
         return {} # Return empty dictionary

    num_nodes = len(node_list_local)
    states = {} # Initialize empty dictionary to store states

    # --- Initialization Logic ---
    node_idx = 0
    while node_idx < num_nodes: # Iterate through nodes
        node_id = node_list_local[node_idx]
        state_vector = None # Initialize vector for this node

        if init_mode == 'random_normal':
            # Generate random state from normal distribution
            random_state = np.random.normal(loc=mean, scale=stddev, size=state_dim).astype(default_state.dtype)
            # Clip the generated random state
            state_vector = clip_vector_cpu(random_state, clip_range) # Use CPU clipping helper
        elif init_mode == 'zeros':
            # Use the default state (which should be zeros or configured value)
            state_vector = default_state.copy() # Use copy to avoid modifying the default
        else:
            # Handle unknown initialization mode
            warnings.warn(f"Unknown INIT_MODE '{init_mode}'. Using default state.", RuntimeWarning)
            state_vector = default_state.copy()

        # Assign the generated state vector to the node ID in the dictionary
        states[node_id] = state_vector
        node_idx = node_idx + 1 # Increment loop counter

    return states

# --- Main Simulation Runner (CPU Dict-Based) ---
def run_simulation_5D_HDC_RSV(
    graph_obj, initial_states_dict, config_local,
    max_steps=None, convergence_thresh=None,
    node_list_local=None, node_to_int_local=None, # Needed for CPU step func
    output_dir=None, checkpoint_interval=50,
    checkpoint_filename="sim_checkpoint.pkl",
    progress_desc="Simulating 5D (CPU)", leave_progress=True
    ):
    """
    Runs CA simulation with 5D HDC/RSV rule (CPU version from Cell 5),
    using state dictionaries, with checkpointing support.
    """
    # --- Prerequisite Checks ---
    args_valid = True
    missing_or_invalid = []
    # Check graph object
    if graph_obj is None or not isinstance(graph_obj, nx.Graph):
        args_valid = False; missing_or_invalid.append("graph_obj")
    # Check initial states dictionary
    if initial_states_dict is None or not isinstance(initial_states_dict, dict):
        args_valid = False; missing_or_invalid.append("initial_states_dict")
    # Check configuration dictionary
    if config_local is None or 'rule_params' not in config_local:
        args_valid = False; missing_or_invalid.append("config_local (with rule_params)")
    # Check simulation control parameters
    if max_steps is None or not isinstance(max_steps, int) or max_steps <= 0:
        args_valid = False; missing_or_invalid.append("max_steps (positive integer)")
    if convergence_thresh is None or not isinstance(convergence_thresh, (float, int)) or convergence_thresh < 0:
        args_valid = False; missing_or_invalid.append("convergence_thresh (non-negative number)")
    # Check node list and mapping needed by the CPU step function
    if node_list_local is None or not isinstance(node_list_local, list) or not node_list_local:
        args_valid = False; missing_or_invalid.append("node_list_local (non-empty list)")
    if node_to_int_local is None or not isinstance(node_to_int_local, dict):
        args_valid = False; missing_or_invalid.append("node_to_int_local (dict)")
    # Check checkpointing parameters if enabled
    checkpointing_enabled = False # Default to disabled
    if output_dir is not None and isinstance(checkpoint_interval, int) and checkpoint_interval > 0 and checkpoint_interval <= max_steps:
         if isinstance(output_dir, str) and isinstance(checkpoint_filename, str):
              checkpointing_enabled = True
         else:
              args_valid = False; missing_or_invalid.append("checkpoint output_dir/filename (strings)")
    elif output_dir is not None:
         # Checkpoint interval invalid or disabled
         warnings.warn("Checkpointing specified (output_dir provided) but interval is invalid or zero. Checkpointing disabled.", RuntimeWarning)


    # Raise error if any argument is invalid
    if not args_valid:
        raise ValueError(f"❌ Invalid/Missing arguments for simulation runner: {missing_or_invalid}")

    # --- Checkpoint Handling ---
    checkpoint_path = None
    if checkpointing_enabled:
        # Ensure output directory exists for checkpointing
        try:
            os.makedirs(output_dir, exist_ok=True)
            checkpoint_path = os.path.join(output_dir, checkpoint_filename)
        except OSError as e_dir:
            warnings.warn(f"Could not create output directory '{output_dir}' for checkpointing. Checkpointing disabled. Error: {e_dir}", RuntimeWarning)
            checkpointing_enabled = False # Disable if dir creation fails


    start_step = 0 # Step to start simulation from (0 for fresh start)
    current_states = {} # Holds the current state dictionary
    state_history = [] # List to store state dictionaries at each step (can consume memory)
    avg_change_history_runner = [] # Store avg change from simulation step

    # Try loading from checkpoint if path exists
    checkpoint_load_successful = False
    if checkpointing_enabled and checkpoint_path is not None and os.path.exists(checkpoint_path):
        print(f"  Attempting to load checkpoint from: {checkpoint_path}")
        try:
            with open(checkpoint_path, 'rb') as f:
                checkpoint_data = pickle.load(f)
            # Validate checkpoint data structure
            if isinstance(checkpoint_data, dict) and 'last_saved_step' in checkpoint_data and 'current_states_dict' in checkpoint_data:
                 start_step = checkpoint_data.get('last_saved_step', -1) + 1 # Start from step AFTER the saved one
                 saved_states = checkpoint_data.get('current_states_dict', {})
                 # Ensure loaded states are numpy arrays (pickle might handle this, but check)
                 current_states = {}
                 for node_id, state_vec in saved_states.items():
                     if isinstance(state_vec, np.ndarray):
                         current_states[node_id] = state_vec
                     else:
                         # Attempt conversion if not already array, fallback to default
                         try: current_states[node_id] = np.array(state_vec, dtype=float)
                         except Exception: current_states[node_id] = np.full(config_local['STATE_DIM'], np.nan, dtype=float)

                 # Add loaded state to history start
                 state_history = [copy.deepcopy(current_states)]
                 # Load previous avg change if available (optional)
                 last_avg_change_chkpt = checkpoint_data.get('last_avg_change', np.nan)
                 avg_change_history_runner = [last_avg_change_chkpt] # Start history with last known change

                 simulation_already_completed = start_step >= max_steps
                 if simulation_already_completed:
                     print(f"  Checkpoint indicates simulation already completed at step {start_step-1}. Returning.")
                     # Need to decide what state_history to return - potentially load full history if saved?
                     # For now, returning just the final state loaded.
                     termination_reason_chkpt = checkpoint_data.get('termination_reason', 'completed_via_checkpoint')
                     return [copy.deepcopy(current_states)], termination_reason_chkpt # Return list with one state dict
                 else:
                     print(f"  Resuming simulation from step {start_step}.")
                     checkpoint_load_successful = True # Mark successful load
            else:
                 warnings.warn("Checkpoint file invalid format. Starting fresh.", RuntimeWarning)

        except Exception as e:
            warnings.warn(f"Checkpoint load failed: {e}. Starting fresh.", RuntimeWarning)
            # Reset variables if load fails
            start_step = 0
            current_states = {}
            state_history = []
            avg_change_history_runner = []


    # --- Initialize if not resuming successfully ---
    if not checkpoint_load_successful:
        # Perform deep copy of initial states to avoid modifying the original dict
        current_states = copy.deepcopy(initial_states_dict)
        # Start history with the initial state
        state_history = [copy.deepcopy(current_states)]
        start_step = 0 # Ensure starting from step 0


    # --- Simulation Loop ---
    termination_reason = "unknown" # Initialize termination reason
    start_sim_time = time.time()
    last_avg_change = np.nan # Store the average change from the last step

    # Get rule parameters once before the loop for efficiency
    simulation_rule_parameters = config_local['rule_params']

    # Setup progress bar if leave_progress is True
    step_iterator = range(start_step, max_steps)
    use_tqdm = leave_progress # Control tqdm display
    if use_tqdm:
         pbar = tqdm(step_iterator, desc=progress_desc, leave=leave_progress, initial=start_step, total=max_steps)
    else:
         pbar = step_iterator # Use plain range if no progress bar


    # Main simulation loop using 'for' loop over range iterator
    step = start_step # Initialize step counter for logic inside loop
    for current_step_index in pbar: # current_step_index goes from start_step to max_steps-1
        step = current_step_index # Use 'step' for consistency with internal logic

        # --- Execute one simulation step using the CPU function ---
        next_states, _, avg_change = simulation_step_5D_HDC_RSV(
            graph_obj, current_states,
            node_list_local, node_to_int_local, # Pass mappings
            simulation_rule_parameters
        )

        # --- Handle Simulation Step Failure ---
        simulation_step_failed = next_states is None or avg_change < 0 # avg_change < 0 indicates error in step func
        if simulation_step_failed:
            print(f"\n❌ Error occurred during simulation step {step+1}. Halting.")
            termination_reason = f"error_at_step_{step+1}"
            if use_tqdm: pbar.close() # Close progress bar on error
            # Return history up to the point of failure
            return state_history, termination_reason

        # --- Store Results of Successful Step ---
        # Append the newly calculated state dictionary to the history (use deepcopy)
        state_history.append(copy.deepcopy(next_states))
        # Update the current state for the next iteration
        current_states = next_states
        # Store the average change from this step
        last_avg_change = avg_change
        avg_change_history_runner.append(last_avg_change)

        # Update progress bar description with average change
        if use_tqdm:
             pbar.set_postfix({'AvgChange': f"{avg_change:.6f}"})

        # --- Check for Convergence ---
        # Check if average change is below the threshold
        converged = False
        if last_avg_change < convergence_thresh:
             converged = True
             termination_reason = f"convergence_at_step_{step+1}"
             if use_tqdm: pbar.close() # Close progress bar on convergence
             break # Exit the simulation loop

        # --- Save Checkpoint Periodically ---
        is_last_iteration = step == max_steps - 1 # Check if it's the final iteration
        is_checkpoint_step = False
        if checkpointing_enabled and checkpoint_interval > 0:
             # Checkpoint at steps 49, 99, 149 etc. if interval is 50
             if (step + 1) % checkpoint_interval == 0:
                  is_checkpoint_step = True

        # Save checkpoint if it's a checkpoint step AND not the very last iteration
        should_save_checkpoint = checkpointing_enabled and is_checkpoint_step and not is_last_iteration
        if should_save_checkpoint:
            # Prepare data to save in checkpoint file
            checkpoint_data_to_save = {
                'last_saved_step': step, # Save the index of the step just completed
                'current_states_dict': current_states, # Save the state *after* the completed step
                'termination_reason': termination_reason, # Store current reason (might be 'unknown')
                'last_avg_change': last_avg_change # Store avg change from this step
                # Optionally save full state history if needed, but increases file size significantly
                # 'full_state_history': state_history
            }
            # Save checkpoint atomically (write to temp file, then replace)
            try:
                temp_path = checkpoint_path + ".tmp" # Temporary file path
                with open(temp_path, 'wb') as f_tmp:
                    pickle.dump(checkpoint_data_to_save, f_tmp)
                # Replace the old checkpoint file with the new temporary file
                os.replace(temp_path, checkpoint_path)
                # Optional: print confirmation
                # if use_tqdm: pbar.set_description(f"{progress_desc} (Chkpt @{step+1})")
            except Exception as e:
                # Print warning if checkpoint saving fails but continue simulation
                print(f"\n⚠️ Checkpoint saving failed at step {step+1}: {e}")

    # --- After the Loop ---
    # This block executes if the loop finishes without breaking (i.e., max steps reached)
    # or if break happened (convergence)
    else: # This is the 'for...else' construct, runs if loop finished normally
        if use_tqdm and not converged: # Ensure progress bar is closed if loop finishes
            pbar.close()
        # If termination reason wasn't set by convergence, set it to max_steps_reached
        if termination_reason == "unknown":
             termination_reason = "max_steps_reached"


    end_sim_time = time.time()
    total_sim_time = end_sim_time - start_sim_time
    if use_tqdm: # Print final time if progress bar was used
        print(f"  Simulation loop finished. Reason: {termination_reason}. Time: {total_sim_time:.2f}s")


    # --- Final Cleanup: Remove Checkpoint File ---
    # Remove checkpoint file only if simulation completed successfully (not error)
    # and checkpointing was enabled and the file exists
    remove_checkpoint = False
    if checkpointing_enabled and checkpoint_path is not None:
        if not termination_reason.startswith("error"):
             if os.path.exists(checkpoint_path):
                 remove_checkpoint = True

    if remove_checkpoint:
        try:
            os.remove(checkpoint_path)
            print(f"  Removed final checkpoint file: {checkpoint_path}")
        except OSError as e_rem:
            warnings.warn(f"Could not remove checkpoint file '{checkpoint_path}': {e_rem}", RuntimeWarning)


    # Return the full state history and the final termination reason
    return state_history, termination_reason

print("✅ Cell 6: CPU 5D HDC State Initializer and Simulation Runner defined.")


--- Cell 6: Simulation Runner Definition (Emergenics - Resumable, CPU Dict-Based) ---
⚠️ Note: This runner uses the CPU implementation from Cell 5.
✅ Cell 6: CPU 5D HDC State Initializer and Simulation Runner defined.


In [7]:
# Cell 7: Graph Generation Functions (Emergenics)
# Description: Defines functions to generate networks (WS, SBM, RGG).
# Adheres strictly to one statement per line after colons.

import networkx as nx
import numpy as np
import random
import warnings

print("\n--- Cell 7: Graph Generation Functions ---")

def generate_ws_graph(n_nodes, k_neighbors, rewiring_prob, seed=None):
    """Generates a Watts-Strogatz small-world graph with input validation."""
    # --- Input Validation ---
    # Check n_nodes
    if not isinstance(n_nodes, int) or n_nodes <= 0:
         raise ValueError(f"n_nodes must be a positive integer, got {n_nodes}")
    # Check k_neighbors
    if not isinstance(k_neighbors, int) or k_neighbors <= 0:
         warnings.warn(f"WS k ({k_neighbors}) must be a positive integer. Setting k=2.", RuntimeWarning)
         k_neighbors = 2 # Default to minimal valid k

    # Ensure k is even
    if k_neighbors % 2 != 0:
        original_k = k_neighbors
        # Decrease k by 1 to make it even, ensuring it's at least 2
        k_neighbors = max(2, k_neighbors - 1)
        warnings.warn(f"WS k ({original_k}) must be even. Setting k={k_neighbors}.", RuntimeWarning)

    # Ensure k is less than n_nodes
    if k_neighbors >= n_nodes:
        original_k = k_neighbors
        # Set k to the largest possible even value less than n_nodes
        # Max k is n-1. If n-1 is odd, use n-2. If n-1 is even, use n-1 (if n>1)
        max_possible_k = n_nodes - 1
        if max_possible_k % 2 != 0:
             corrected_k = max(2, max_possible_k - 1) # Need at least k=2
        else:
             corrected_k = max(2, max_possible_k)

        warnings.warn(f"WS k ({original_k}) >= n ({n_nodes}). Setting k={corrected_k}.", RuntimeWarning)
        k_neighbors = corrected_k

    # Check rewiring probability
    if not isinstance(rewiring_prob, (float, int)) or not (0 <= rewiring_prob <= 1):
         raise ValueError(f"rewiring_prob must be between 0 and 1, got {rewiring_prob}")


    # --- Generate Graph ---
    ws_graph = None # Initialize to None
    try:
        # Call NetworkX function with validated parameters
        ws_graph = nx.watts_strogatz_graph(n=n_nodes, k=k_neighbors, p=rewiring_prob, seed=seed)
    except nx.NetworkXError as e:
        # Catch potential errors during graph generation itself
        print(f"❌ Error generating WS graph (n={n_nodes}, k={k_neighbors}, p={rewiring_prob}): {e}")
        ws_graph = None # Ensure None is returned on failure

    # Return the generated graph (or None if generation failed)
    return ws_graph


def generate_sbm_graph(n_nodes, block_sizes_list, p_intra_community, p_inter_community, seed=None):
    """Generates a Stochastic Block Model graph with input validation."""
    # --- Input Validation ---
    if not isinstance(n_nodes, int) or n_nodes <= 0:
         raise ValueError(f"n_nodes must be a positive integer, got {n_nodes}")
    if not isinstance(block_sizes_list, list) or not block_sizes_list:
         raise ValueError("block_sizes_list must be a non-empty list of integers.")
    # Check block sizes are positive integers
    block_idx = 0
    valid_blocks = True
    sum_block_sizes = 0
    while block_idx < len(block_sizes_list):
         size = block_sizes_list[block_idx]
         if not isinstance(size, int) or size <= 0:
              valid_blocks = False
              break
         sum_block_sizes = sum_block_sizes + size
         block_idx = block_idx + 1
    if not valid_blocks:
         raise ValueError("block_sizes_list must contain only positive integers.")
    # Check if block sizes sum to n_nodes
    if sum_block_sizes != n_nodes:
         warnings.warn(f"SBM block sizes sum ({sum_block_sizes}) != n_nodes ({n_nodes}). Check generation logic.", RuntimeWarning)
         # Do not raise error, NetworkX might handle this, but warn user.
    # Check probabilities
    if not (isinstance(p_intra_community, (float, int)) and 0 <= p_intra_community <= 1):
        raise ValueError(f"p_intra_community must be between 0 and 1, got {p_intra_community}")
    if not (isinstance(p_inter_community, (float, int)) and 0 <= p_inter_community <= 1):
        raise ValueError(f"p_inter_community must be between 0 and 1, got {p_inter_community}")

    num_blocks = len(block_sizes_list)

    # --- Construct Probability Matrix ---
    # Initialize empty list for the matrix
    probability_matrix = []
    # Outer loop for rows
    row_idx = 0
    while row_idx < num_blocks:
        # Initialize empty list for the current row
        current_row_probabilities = []
        # Inner loop for columns
        col_idx = 0
        while col_idx < num_blocks:
            # Check if it's a diagonal element (intra-community)
            if row_idx == col_idx:
                current_row_probabilities.append(p_intra_community)
            else: # Off-diagonal element (inter-community)
                current_row_probabilities.append(p_inter_community)
            col_idx = col_idx + 1 # Increment column index
        # Append the completed row to the matrix
        probability_matrix.append(current_row_probabilities)
        row_idx = row_idx + 1 # Increment row index


    # --- Generate Graph ---
    sbm_graph = None # Initialize to None
    try:
        # Call NetworkX SBM function
        sbm_graph = nx.stochastic_block_model(sizes=block_sizes_list, p=probability_matrix, seed=seed)
    except Exception as e:
        # Catch potential errors during graph generation
        print(f"❌ Error generating SBM graph (sizes={block_sizes_list}, p_in={p_intra_community}, p_out={p_inter_community}): {e}")
        sbm_graph = None # Ensure None is returned on failure

    return sbm_graph


def generate_rgg_graph(n_nodes, connection_radius, seed=None):
    """Generates a Random Geometric Graph with input validation."""
    # --- Input Validation ---
    if not isinstance(n_nodes, int) or n_nodes <= 0:
         raise ValueError(f"n_nodes must be a positive integer, got {n_nodes}")
    # Radius should typically be positive, NetworkX might handle 0 but warn.
    if not isinstance(connection_radius, (float, int)) or connection_radius < 0:
         raise ValueError(f"connection_radius must be non-negative, got {connection_radius}")
    if connection_radius == 0:
         warnings.warn("connection_radius is 0, graph will likely have no edges.", RuntimeWarning)

    # --- Seed Position Generation (if seed provided) ---
    # Note: NetworkX's random_geometric_graph uses np.random if seed is not None,
    # but we can also seed the standard `random` module if positions were generated manually.
    # Since NetworkX handles it, explicit seeding here might be redundant unless using manual pos.
    # if seed is not None:
    #     random.seed(seed) # Seed standard random module if needed elsewhere

    # --- Generate Graph ---
    # NetworkX's rgg handles position generation internally if 'pos' is not provided.
    rgg_graph = None # Initialize to None
    try:
        # Call NetworkX function, passing the seed directly
        rgg_graph = nx.random_geometric_graph(n=n_nodes, radius=connection_radius, seed=seed)
        # Optionally, store positions if needed later:
        # pos = nx.get_node_attributes(rgg_graph, 'pos')
    except Exception as e:
        # Catch potential errors during graph generation
        print(f"❌ Error generating RGG graph (n={n_nodes}, r={connection_radius}): {e}")
        rgg_graph = None # Ensure None is returned on failure

    return rgg_graph

print("✅ Cell 7: Graph generation functions defined.")


--- Cell 7: Graph Generation Functions ---
✅ Cell 7: Graph generation functions defined.


In [None]:
# Cell 8: Run Parametric Sweep (Phase 1 Baseline - WS Model - Corrected Emptiness Check)
# Description: Runs the primary WS sweep as a baseline using the original Phase 1
#              worker (`run_single_instance`) for direct comparison.
#              Corrects the check for empty system_sizes/param_values.

import pandas as pd
import numpy as np
import networkx as nx
import time
import os
import pickle
import itertools
import warnings
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm.auto import tqdm
import copy
import multiprocessing as mp
import torch
import traceback
import json # Import json for loading config

# *** Import ORIGINAL Worker Function from Phase 1 utils ***
# This cell runs the BASELINE Phase 1 sweep, so it needs the original worker
try:
    # Check if it was already imported, otherwise import
    if 'run_single_instance' not in globals():
        from worker_utils import run_single_instance
        print("✅ Imported ORIGINAL Phase 1 worker: run_single_instance from worker_utils.py")
    else:
        # Optional: Add check if the existing one is the Phase 2 version
        # This logic might be complex, maybe just re-import or trust context
        print("ℹ️ Using pre-existing 'run_single_instance' (ensure it's the Phase 1 version for this cell).")
except ImportError:
    raise ImportError("❌ ERROR: Cannot import Phase 1 worker 'run_single_instance' from worker_utils.py.")

# *** Ensure Helpers Defined (From Cell 2) ***
if 'generate_graph' not in globals(): raise NameError("generate_graph not defined. Run Cell 2.")
if 'get_sweep_parameters' not in globals(): raise NameError("get_sweep_parameters not defined. Run Cell 2.")


print("\n--- Cell 8: Run Parametric Sweep (Phase 1 Baseline - WS Model - Corrected Emptiness Check) ---")

# --- Configuration Loading ---
config = {}
analysis_error_sweep = False
try:
    # Use the config loaded in Cell 1
    if 'config' in globals() and isinstance(globals()['config'], dict) and 'OUTPUT_DIR' in globals()['config']:
        config = globals()['config']
        print(f"  Using configuration loaded in Cell 1 (Experiment: {config.get('EXPERIMENT_NAME', 'N/A')})")
    else:
        # Attempt to load Phase 2 config if global config is missing (less ideal)
        output_dir_base_ph2 = "emergenics_phase2_results"
        exp_pattern_ph2 = "Emergenics_Phase2" # Adjust pattern if needed
        if os.path.isdir(output_dir_base_ph2):
            all_subdirs_ph2 = [d for d in os.listdir(output_dir_base_ph2) if os.path.isdir(os.path.join(output_dir_base_ph2, d)) and d.startswith(exp_pattern_ph2)]
            if all_subdirs_ph2:
                latest_run_dir_ph2 = max([os.path.join(output_dir_base_ph2, d) for d in all_subdirs_ph2], key=os.path.getmtime)
                config_path_ph2 = os.path.join(latest_run_dir_ph2, "run_config_phase2.json")
                if os.path.exists(config_path_ph2):
                    with open(config_path_ph2, 'r') as f_ph2:
                        config = json.load(f_ph2)
                    print(f"  Loaded configuration from latest Phase 2 run: {latest_run_dir_ph2}")
                else:
                    raise FileNotFoundError("Config file not found in latest Phase 2 dir.")
            else:
                raise FileNotFoundError("No Phase 2 experiment directories found.")
        else:
            raise FileNotFoundError("Phase 2 base directory not found.")

    # Extract necessary parameters from the loaded config
    TARGET_MODEL = 'WS' # Hardcode to WS for this baseline run
    graph_params_all = config.get('GRAPH_MODEL_PARAMS', {}) # Use original Phase 1 ranges
    if not isinstance(graph_params_all, dict): graph_params_all = {} # Ensure dict
    graph_model_params = graph_params_all.get(TARGET_MODEL, {})
    if not isinstance(graph_model_params, dict): graph_model_params = {} # Ensure dict

    param_name = None; param_values = None; primary_param_key_found = False
    param_key_iter = iter(graph_model_params.keys())
    current_key = next(param_key_iter, None)
    while current_key is not None:
         values = graph_model_params[current_key]
         if isinstance(values, (list, np.ndarray)):
             param_name = current_key.replace('_values', '')
             param_values = values
             primary_param_key_found = True
             # break # Assuming only one sweep param per model in config
         current_key = next(param_key_iter, None)

    if not primary_param_key_found:
         # Handle cases like RGG if needed, though hardcoded to WS here
         if TARGET_MODEL=='RGG' and 'radius_values' in graph_model_params:
              param_name='radius'
              param_values=graph_model_params['radius_values']
         else:
              # Default if sweep param not found
              param_name = 'p' # Default for WS if not found
              param_values = np.logspace(-5, 0, 20) # Default values
              warnings.warn(f"Sweep param values not found for {TARGET_MODEL} in loaded config. Using default range.")

    system_sizes = config.get('SYSTEM_SIZES', [300, 500, 700])
    num_instances = config.get('NUM_INSTANCES_PER_PARAM', 10)
    num_trials = config.get('NUM_TRIALS_PER_INSTANCE', 3)
    rule_params_base = config.get('RULE_PARAMS', {})
    max_steps = config.get('MAX_SIMULATION_STEPS', 200)
    conv_thresh = config.get('CONVERGENCE_THRESHOLD', 1e-4)
    state_dim = config.get('STATE_DIM', 5)
    workers = config.get('PARALLEL_WORKERS', os.cpu_count())
    output_dir = config.get('OUTPUT_DIR', ".") # Use Phase 2 output dir for saving these baseline results
    exp_name = config.get('EXPERIMENT_NAME', "Phase2_BaselineRun")
    calculate_energy = config.get('CALCULATE_ENERGY', True)
    store_energy_history = config.get('STORE_ENERGY_HISTORY', False) # Phase 1 setting
    energy_type = config.get('ENERGY_FUNCTIONAL_TYPE', 'pairwise_dot')
    primary_metric = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')
    all_metrics = config.get('ORDER_PARAMETERS_TO_ANALYZE', ['variance_norm', 'entropy_dim_0', 'final_energy'])

    # *** CORRECTED VALIDATION ***
    # Check if lists/arrays are empty using len() which works for both lists and numpy arrays
    if len(system_sizes) == 0 or (param_values is not None and len(param_values) == 0) or param_values is None:
        raise ValueError("SYSTEM_SIZES or parameter values list is empty or None in config.")

except (ValueError, KeyError) as e_key:
     raise KeyError(f"❌ FATAL: Missing essential key or invalid value ('{e_key}') in configuration dictionary. Run Cell 1.")
except Exception as e_conf:
     raise RuntimeError(f"❌ FATAL: Error loading configuration for sweep: {e_conf}")


print(f"Target Model: {TARGET_MODEL}")
print(f"Sweep Parameter: {param_name} (Values: {len(param_values)})")
print(f"System Sizes: {system_sizes}")
print(f"Using {workers} workers.")
print(f"Results will be saved relative to: {output_dir}")

# --- Device Check ---
if torch.cuda.is_available(): device = torch.device('cuda:0')
else: device = torch.device('cpu')
print(f"  Using device: {device}")

# --- Prepare Sweep Tasks ---
# Use Phase 1 style sweep generation (no perturbation etc.)
sweep_tasks = get_sweep_parameters(
    graph_model_name=TARGET_MODEL,
    model_params=graph_model_params,
    system_sizes=system_sizes,
    instances=num_instances,
    trials=num_trials
)
print(f"Prepared {len(sweep_tasks)} {TARGET_MODEL} baseline tasks across {len(system_sizes)} sizes.")

# --- Setup Logging & Partial Results ---
# Save baseline results with a distinct name in the *Phase 2* output directory
baseline_log_file = os.path.join(output_dir, f"{exp_name}_{TARGET_MODEL}_baseline_sweep.log")
baseline_partial_results_file = os.path.join(output_dir, f"{exp_name}_{TARGET_MODEL}_baseline_sweep_partial.pkl")
completed_tasks_signatures = set(); all_results_list = []

# Robust loading logic for baseline files
if os.path.exists(baseline_log_file):
    try:
        with open(baseline_log_file, 'r') as f: completed_tasks_signatures = set(line.strip() for line in f)
    except Exception as e_load_log: print(f" Warning: Could not load baseline log: {e_load_log}")
if os.path.exists(baseline_partial_results_file):
    try:
        with open(baseline_partial_results_file, 'rb') as f: all_results_list = pickle.load(f)
        # Rebuild signatures from loaded data if log was incomplete/corrupt
        if all_results_list and not completed_tasks_signatures:
             temp_df_signatures = pd.DataFrame(all_results_list)
             param_value_key_load = param_name + '_value'
             if all(k in temp_df_signatures.columns for k in ['N', param_value_key_load, 'instance', 'trial']):
                  completed_tasks_signatures = set( f"N={row['N']}_{param_name}={row[param_value_key_load]:.5f}_inst={row['instance']}_trial={row['trial']}" for _, row in temp_df_signatures.iterrows() )
                  print(f" Rebuilt {len(completed_tasks_signatures)} signatures from partial results.")
             del temp_df_signatures
    except Exception as e_load_pkl: print(f" Warning: Could not load baseline partial results: {e_load_pkl}"); all_results_list = []
print(f"Loaded {len(completed_tasks_signatures)} completed task signatures and {len(all_results_list)} previous baseline results.")

# Filter tasks
tasks_to_run = []; param_value_key_filter = param_name + '_value'
task_idx_filter = 0
while task_idx_filter < len(sweep_tasks):
    task_params = sweep_tasks[task_idx_filter]
    # Basic check for required keys before creating signature
    if param_value_key_filter not in task_params or 'N' not in task_params or 'instance' not in task_params or 'trial' not in task_params:
        warnings.warn(f"Skipping task due to missing keys: {task_params}", RuntimeWarning)
        task_idx_filter += 1
        continue
    # Format float for signature robustly
    try: p_val_fmt = f"{task_params[param_value_key_filter]:.5f}"
    except (TypeError, ValueError): p_val_fmt = str(task_params[param_value_key_filter]) # Fallback
    task_sig = f"N={task_params['N']}_{param_name}={p_val_fmt}_inst={task_params['instance']}_trial={task_params['trial']}"
    if task_sig not in completed_tasks_signatures:
        tasks_to_run.append(task_params)
    task_idx_filter += 1


# --- Execute Sweep in Parallel ---
if tasks_to_run:
    print(f"Executing {len(tasks_to_run)} new {TARGET_MODEL} baseline tasks (Device: {device}, Workers: {workers})...")
    try: # Set spawn method
        if mp.get_start_method(allow_none=True) != 'spawn': mp.set_start_method('spawn', force=True); print("  Set multiprocessing start method to 'spawn'.")
    except Exception as e_spawn: print(f" Warning: Could not force spawn method: {e_spawn}")

    start_time = time.time(); futures = []; pool_broken_flag = False
    # Define executor instance robustly
    actual_workers = min(workers, os.cpu_count() if os.cpu_count() else 1) # Limit workers if needed
    executor_instance = ProcessPoolExecutor(max_workers=actual_workers)
    try:
        # Submit tasks using a while loop
        task_idx_submit = 0
        while task_idx_submit < len(tasks_to_run):
            task_params = tasks_to_run[task_idx_submit]
            param_value_key_submit = param_name + '_value'
            if param_value_key_submit not in task_params:
                 warnings.warn(f"Skipping submit task missing key {param_value_key_submit}: {task_params}")
                 task_idx_submit += 1; continue

            # Combine fixed params and sweep param for graph generation
            graph_gen_params = task_params.get('fixed_params', {}).copy()
            graph_gen_params[param_name] = task_params[param_value_key_submit]

            G = generate_graph( task_params['model'], graph_gen_params, task_params['N'], task_params['graph_seed'] )

            if G is None or G.number_of_nodes() == 0:
                 warnings.warn(f"Skipping task due to failed graph generation: {task_params}")
                 task_idx_submit += 1; continue # Skip failed graph gen

            # Submit using the ORIGINAL run_single_instance
            future = executor_instance.submit(
                run_single_instance, # Calling the Phase 1 worker
                graph=G, N=task_params['N'], instance_params=task_params, trial_seed=task_params['sim_seed'],
                rule_params_in=rule_params_base, max_steps=max_steps, conv_thresh=conv_thresh, state_dim=state_dim,
                calculate_energy=calculate_energy, store_energy_history=store_energy_history,
                energy_type=energy_type, metrics_to_calc=all_metrics,
                device=str(device) # Pass device name as string
            )
            futures.append((future, task_params))
            task_idx_submit += 1

        # Collect results using a while loop and iterator
        pbar = tqdm(total=len(futures), desc=f"{TARGET_MODEL} Baseline Sweep", mininterval=2.0)
        log_frequency = max(1, len(futures) // 50); save_frequency = max(20, len(futures) // 10)
        tasks_processed_since_save = 0
        completed_futures_iterator = as_completed(dict(futures).keys()) # Create iterator from futures
        futures_processed_count = 0

        with open(baseline_log_file, 'a') as f_log:
             while futures_processed_count < len(futures):
                 if pool_broken_flag:
                      # If pool breaks, attempt to update progress bar for remaining tasks
                      remaining_updates = len(futures) - futures_processed_count
                      pbar.update(remaining_updates)
                      break # Exit the loop
                 try:
                      future = next(completed_futures_iterator) # Get the next completed future
                      task_params = dict(futures)[future] # Find corresponding task params

                      result_dict = future.result(timeout=1200) # Increased timeout
                      if result_dict:
                           full_result = copy.deepcopy(task_params)
                           full_result.update(result_dict)
                           all_results_list.append(full_result); tasks_processed_since_save += 1
                           # Log completion
                           param_value_key_log = param_name + '_value'
                           if result_dict.get('error_message') is None and param_value_key_log in task_params:
                                try: p_val_fmt_log = f"{task_params[param_value_key_log]:.5f}"
                                except (TypeError, ValueError): p_val_fmt_log = str(task_params[param_value_key_log])
                                task_sig = f"N={task_params['N']}_{param_name}={p_val_fmt_log}_inst={task_params['instance']}_trial={task_params['trial']}"
                                f_log.write(f"{task_sig}\n")
                                if futures_processed_count % log_frequency == 0: f_log.flush() # Flush periodically
                 except StopIteration: # Should not happen if loop condition is correct, but safe catch
                      break
                 except Exception as e_collect:
                      if "Broken" in str(e_collect) or "abruptly" in str(e_collect) or isinstance(e_collect, TypeError):
                           print(f"\n❌ ERROR: Pool broke during result collection. Exception: {type(e_collect).__name__}: {e_collect}")
                           pool_broken_flag = True
                           # Continue loop to update progress bar, but don't process more results
                      else:
                           # Log other errors if needed, but don't break pool flag
                           warnings.warn(f"Error processing future result: {type(e_collect).__name__}: {e_collect}", RuntimeWarning)
                           # traceback.print_exc(limit=1) # Optional detail
                 finally:
                      pbar.update(1)
                      futures_processed_count += 1
                      # Save partial results periodically
                      should_save_partial = tasks_processed_since_save >= save_frequency or futures_processed_count == len(futures)
                      if should_save_partial and tasks_processed_since_save > 0 :
                           try:
                               with open(baseline_partial_results_file, 'wb') as f_partial:
                                   pickle.dump(all_results_list, f_partial)
                               tasks_processed_since_save = 0 # Reset counter after successful save
                           except Exception as e_save_part: print(f" Warning: Failed to save partial results: {e_save_part}")

    except KeyboardInterrupt: print("\nExecution interrupted by user.")
    except Exception as main_e: print(f"\n❌ ERROR during parallel execution setup: {main_e}"); traceback.print_exc(limit=2)
    finally:
        pbar.close(); print("Shutting down executor..."); executor_instance.shutdown(wait=True, cancel_futures=False); print("Executor shut down.") # Allow completing futures on shutdown
        try: # Final save attempt
            with open(baseline_partial_results_file, 'wb') as f_partial: pickle.dump(all_results_list, f_partial)
        except Exception as e_final_save: print(f" Warning: Final save failed: {e_final_save}")
        end_time = time.time(); print(f"\n✅ Parallel execution block completed ({end_time - start_time:.1f}s).")
else: print(f"✅ No new baseline tasks to run for {TARGET_MODEL} sweep.")


# --- Process Final Results ---
print("\nProcessing final baseline results...")
# *** Initialize global variable to empty DataFrame ***
global_sweep_results_baseline = pd.DataFrame() # Use a distinct name for baseline results
# ****************************************************
if not all_results_list: print("⚠️ No baseline results collected.")
else:
    try: # Add try-except around DataFrame creation and processing
        final_results_df = pd.DataFrame(all_results_list)
        # --- Add Check after DataFrame creation ---
        print(f"  DEBUG: Baseline DataFrame created successfully? {'Yes' if not final_results_df.empty else 'NO - DataFrame is empty!'}")
        print(f"  DEBUG: Baseline DataFrame shape after creation: {final_results_df.shape}")
        # ------------------------------------------

        # Error checking from worker runs
        if 'error_message' in final_results_df.columns:
             failed_run_count = final_results_df['error_message'].notna().sum()
             if failed_run_count > 0: warnings.warn(f"{failed_run_count} baseline runs reported errors.")

        # Ensure primary order parameter exists
        if primary_metric not in final_results_df.columns and 'order_parameter' not in final_results_df.columns:
            warnings.warn(f"Primary metric '{primary_metric}' not found! Cannot set 'order_parameter'.")
        elif primary_metric in final_results_df.columns:
             # Create or overwrite 'order_parameter' column using the primary metric
             final_results_df['order_parameter'] = final_results_df[primary_metric]
             final_results_df['metric_name'] = primary_metric # Store which metric was used
        else:
             # If primary metric is missing but 'order_parameter' exists, trust it but warn
             warnings.warn(f"Primary metric '{primary_metric}' missing, using existing 'order_parameter'.")


        print(f"Collected baseline results from {final_results_df.shape[0]} total attempted runs.")
        final_csv_path = os.path.join(output_dir, f"{exp_name}_{TARGET_MODEL}_baseline_sweep_results.csv")
        try:
            final_results_df.to_csv(final_csv_path, index=False); print(f"✅ Final {TARGET_MODEL} baseline sweep results saved to CSV.")
            # *** Explicitly assign to global variable ***
            global_sweep_results_baseline = final_results_df
            print(f"  DEBUG: Assigned baseline results to global_sweep_results_baseline.")
            # *******************************************
        except Exception as e_save:
             print(f"❌ Error saving final baseline CSV: {e_save}")
             print("  DEBUG: Global variable 'global_sweep_results_baseline' might be empty due to save failure.")
    except Exception as e_proc:
        print(f"❌ ERROR during final baseline results processing: {e_proc}")
        traceback.print_exc(limit=2)
        print("  DEBUG: Global variable 'global_sweep_results_baseline' will be empty due to processing error.")


# *** Add Final Check at the very end of the cell ***
print("\n--- Final Check within Cell 8 ---")
if 'global_sweep_results_baseline' in globals() and isinstance(global_sweep_results_baseline, pd.DataFrame) and not global_sweep_results_baseline.empty:
    print(f"  ✅ global_sweep_results_baseline DataFrame exists and is not empty. Shape: {global_sweep_results_baseline.shape}")
    # print(global_sweep_results_baseline.head()) # Optional: print head to verify
else:
    print(f"  ❌ global_sweep_results_baseline DataFrame is MISSING or EMPTY at the end of Cell 8!")
    print(f"     Type: {type(globals().get('global_sweep_results_baseline'))}")
    if 'final_results_df' in locals():
         print(f"     (Local final_results_df existed with shape: {final_results_df.shape})")
    else:
         print("     (Local final_results_df did not exist)")
# *************************************************

print(f"\n✅ Cell 8: Parametric baseline sweep for {TARGET_MODEL} completed.")

✅ Imported Phase 1 worker function: run_single_instance

--- Cell 8: Run Parametric Sweep (GPU - Phase 1 WS Sweep - Completed State) ---


RuntimeError: ❌ FATAL: Error loading configuration for sweep: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
# Cell 9: Critical Point Analysis (FSS on Susceptibility with Optuna)
# Description: Calculates Susceptibility (Chi). Uses Optuna to find the best FSS parameters
#              (pc, gamma/nu, 1/nu) by minimizing collapse error for Chi. Plots the result.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit, minimize # Keep minimize for comparison if needed
import warnings
import os
import traceback
import json
import optuna # Import Optuna

# --- Suppress Optuna INFO messages for cleaner output ---
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("\n--- Cell 9: Critical Point Analysis (FSS on Susceptibility with Optuna) ---")

# --- Explicitly Load Configuration ---
# Load config dictionary (should exist from Cell 1)
config = {}
analysis_error = False
if 'config' not in globals() or not isinstance(config, dict):
     print("❌ FATAL: Config dictionary missing. Run Cell 1 first.")
     analysis_error = True
else:
     config = globals()['config'] # Use the existing config
     print(f"✅ Configuration dictionary loaded.")

# Load necessary parameters from config if no error
if not analysis_error:
    try:
        output_dir = config['OUTPUT_DIR']
        exp_name = config['EXPERIMENT_NAME']
        primary_metric = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm') # Need M for moments
        system_sizes = config.get('SYSTEM_SIZES', [])
        # Determine parameter name for the primary sweep (WS model in this case)
        param_name = 'p_value' # Default for WS
        ws_params = config.get('GRAPH_MODEL_PARAMS', {}).get('WS', {})
        ws_param_key = next((k for k in ws_params if k.endswith('_values')), None)
        if ws_param_key:
            param_name = ws_param_key.replace('_values', '_value') # Get the column name format
        else:
             warnings.warn("Could not dynamically determine WS sweep parameter name from config, defaulting to 'p_value'.")


        num_trials = config.get('NUM_TRIALS_PER_INSTANCE', 1) # For variance calc accuracy check
    except KeyError as e_key:
        print(f"❌ FATAL: Missing key '{e_key}' in loaded configuration.")
        analysis_error = True
    except Exception as config_e:
        print(f"❌ FATAL: Failed to load necessary parameters from configuration: {config_e}")
        analysis_error = True

# --- Helper Function ---
def format_metric(value, fmt):
    """Safely formats a numerical value using a format string."""
    # Check if value is valid number (not None, NaN, Inf)
    is_valid_number = False
    if value is not None:
        if isinstance(value, (int, float)):
             if np.isfinite(value):
                  is_valid_number = True

    if is_valid_number:
        try:
            # Apply formatting
            formatted_string = fmt % value
            return formatted_string
        except (TypeError, ValueError):
            # Handle potential formatting errors
            return "Format Error"
    else:
        # Return N/A for invalid inputs
        return "N/A"

# --- Diagnostic Check ---
if not analysis_error:
    print("\n--- Step 9.1: Diagnosing Input Data (`global_sweep_results`) ---")
    # Check if the primary results DataFrame exists and is valid
    if 'global_sweep_results' not in globals():
        analysis_error = True; print("❌ FATAL: `global_sweep_results` DataFrame missing (Run Cell 8).")
    elif not isinstance(global_sweep_results, pd.DataFrame):
        analysis_error = True; print("❌ FATAL: `global_sweep_results` is not a Pandas DataFrame.")
    elif global_sweep_results.empty:
        analysis_error = True; print("❌ FATAL: `global_sweep_results` DataFrame is empty.")
    else:
        print(f"  DataFrame Shape: {global_sweep_results.shape}")
        # Check for required columns needed for susceptibility calculation
        required_cols = ['N', param_name, primary_metric, 'instance', 'trial']
        missing_cols = []
        col_idx = 0
        while col_idx < len(required_cols):
             col = required_cols[col_idx]
             if col not in global_sweep_results.columns:
                  missing_cols.append(col)
             col_idx += 1

        if len(missing_cols) > 0:
             analysis_error = True; print(f"❌ FATAL: Missing required columns in DataFrame: {missing_cols}.")
        else:
             print(f"  Required columns found: {required_cols}.")
             # Check unique system sizes (N)
             unique_N = global_sweep_results['N'].unique()
             print(f"  Unique 'N' values found: {sorted(unique_N)}")
             if len(unique_N) < 2: # Need at least two sizes for FSS
                  analysis_error = True; print(f"❌ FATAL: Need at least 2 unique 'N' values for FSS. Found {len(unique_N)}.")
             else:
                  print("  Sufficient unique 'N' values for FSS.")
                  # Check the primary metric column for NaNs
                  print(f"\n  Diagnostics for primary metric column ('{primary_metric}'):")
                  metric_col = global_sweep_results[primary_metric]
                  total_count = len(metric_col)
                  non_nan_count = metric_col.notna().sum()
                  nan_count = metric_col.isna().sum()
                  print(f"    Total entries: {total_count}")
                  print(f"    Non-NaN entries: {non_nan_count}")
                  print(f"    NaN entries: {nan_count}")

                  if non_nan_count == 0:
                       analysis_error = True; print(f"❌ FATAL: Primary metric column '{primary_metric}' contains only NaNs.")
                  else:
                       # Display basic statistics if data is valid
                       try:
                           print("    Basic Stats (non-NaN values):\n", metric_col.describe())
                           print("✅ Input data appears valid for susceptibility calculation.")
                       except Exception as desc_e:
                            analysis_error = True; print(f"❌ Error getting descriptive statistics: {desc_e}")

# --- Initialize results dictionary for this cell ---
global_optuna_fss_chi_results = {} # Store Optuna results specifically for Chi FSS

# --- Proceed only if diagnostics passed ---
if not analysis_error:
    print(f"\n--- Step 9.2: Aggregating Susceptibility (χ) ---")
    fss_chi_df = pd.DataFrame() # Initialize empty dataframe
    try:
        # Calculate variance of the primary order parameter (M) across trials/instances
        # Group by system size (N) and the sweep parameter (p)
        # Use observed=True to handle potential categorical nature of grouped columns
        # Convert primary metric to numeric first, coercing errors to NaN
        M_numeric = pd.to_numeric(global_sweep_results[primary_metric], errors='coerce')
        # Calculate variance, requires at least 2 data points per group
        var_M = global_sweep_results.assign(M_numeric=M_numeric).groupby(['N', param_name], observed=True)['M_numeric'].var()

        # Check if variance calculation produced NaNs (indicates insufficient data in some groups)
        if var_M.isna().any():
            nan_groups = var_M[var_M.isna()].index.tolist()
            warnings.warn(f"NaNs found in Var(M) calculation for {len(nan_groups)} groups, possibly due to insufficient trials/instances per group. These groups will be dropped.", RuntimeWarning)
            # print("Groups with NaN variance:", nan_groups[:5]) # Print first few examples

        # Calculate Susceptibility: χ = N * Var(M)
        # Multiply the Series var_M by the corresponding 'N' level of its MultiIndex
        susceptibility_chi_agg = var_M.index.get_level_values('N') * var_M

        # Combine into a DataFrame for FSS analysis
        fss_chi_df = pd.DataFrame({'susceptibility_chi': susceptibility_chi_agg}).reset_index()
        # Drop rows where susceptibility could not be calculated (resulted in NaN)
        fss_chi_df = fss_chi_df.dropna()

        # Check if the resulting DataFrame is usable for FSS
        if fss_chi_df.empty:
             raise ValueError("Susceptibility DataFrame is empty after aggregation/dropna.")
        if fss_chi_df['N'].nunique() < 2 :
            raise ValueError(f"Susceptibility DataFrame has < 2 unique sizes ({fss_chi_df['N'].unique()}) after aggregation/dropna.")

        print(f"  Aggregated Susceptibility (χ) ready for FSS (Entries: {len(fss_chi_df)}).")
        # print("  Sample aggregated data:\n", fss_chi_df.head())

    except KeyError as e_agg_key:
        print(f"❌ Error aggregating susceptibility: Missing column {e_agg_key}")
        traceback.print_exc(limit=1)
        analysis_error = True
    except Exception as agg_chi_e:
        print(f"❌ Error aggregating susceptibility: {agg_chi_e}")
        traceback.print_exc(limit=1)
        analysis_error = True


# --- FSS on Susceptibility using Optuna ---
if not analysis_error:
    print(f"\n--- Step 9.3: FSS on Susceptibility using Optuna ---")

    # --- Prepare Data for Optuna Objective ---
    # Extract columns needed for scaling into NumPy arrays
    # Ensure data types are appropriate for calculations (float)
    try:
        Ls_chi = fss_chi_df['N'].values.astype(np.float64) # System sizes (L)
        ps_chi = fss_chi_df[param_name].values.astype(np.float64) # Control parameter (p)
        Ms_chi = fss_chi_df['susceptibility_chi'].values.astype(np.float64) # Observable (M = Chi)
    except KeyError as e_fss_prep:
         print(f"❌ Error preparing FSS data: Missing column {e_fss_prep}.")
         analysis_error = True
    except Exception as e_fss_prep_other:
         print(f"❌ Error preparing FSS data: {e_fss_prep_other}.")
         analysis_error = True


    # --- Define Optuna Objective Function ---
    # This function calculates collapse error for given trial parameters (pc, gamma/nu, 1/nu)
    # It aims to minimize the variance of scaled data points within bins along the scaled x-axis.
    def objective_fss_chi(trial):
        # Suggest parameters within defined ranges using Optuna's trial object
        # Log scale for pc is often useful if pc is expected near zero
        pc = trial.suggest_float("pc", 1e-5, 0.1, log=True)
        # Range for exponent ratios - adjust based on expected physics or preliminary results
        gamma_nu = trial.suggest_float("gamma_over_nu", 0.1, 3.0) # gamma / nu
        one_nu = trial.suggest_float("one_over_nu", 0.1, 5.0)   # 1 / nu

        # --- Calculate scaled variables & collapse error ---
        # Scaling for Susceptibility: Y = Chi * L^(-gamma/nu), X = (p - pc) * L^(1/nu)
        # Use 'p' or 'r' depending on the model, pc is the critical value being tested
        scaled_x = (ps_chi - pc) * (Ls_chi ** one_nu)
        scaled_y = Ms_chi * (Ls_chi ** (-gamma_nu)) # Note the negative sign in exponent for Chi scaling

        # Sort data points based on the scaled X values for binning
        # This is crucial for calculating variance within defined x-bins
        sorted_indices = np.argsort(scaled_x)
        scaled_x_sorted = scaled_x[sorted_indices]
        scaled_y_sorted = scaled_y[sorted_indices]

        # --- Calculate Collapse Error using Binning ---
        total_error = 0.0 # Accumulator for variance within bins
        num_bins = 20 # Number of bins to divide the scaled x-axis into

        try:
            # Filter out potential Inf/-Inf values resulting from scaling (e.g., L^large_exponent)
            # Only consider points where both scaled_x and scaled_y are finite
            valid_indices = np.isfinite(scaled_x_sorted) & np.isfinite(scaled_y_sorted)
            if not np.any(valid_indices):
                # If no valid points remain after scaling, return infinity (worst possible collapse)
                return np.inf

            scaled_x_finite = scaled_x_sorted[valid_indices]
            scaled_y_finite = scaled_y_sorted[valid_indices]

            # Ensure there are enough points for binning
            num_valid_points = len(scaled_x_finite)
            if num_valid_points < num_bins:
                # Reduce bins if fewer points than requested bins
                num_bins = max(1, num_valid_points // 2) # Ensure at least one bin

            # Determine bin edges based on the range of finite scaled x values
            min_x = np.min(scaled_x_finite)
            max_x = np.max(scaled_x_finite)

            # Handle edge case where all scaled x values are identical
            if abs(min_x - max_x) < 1e-9:
                # If all x are same, collapse error is just the variance of y
                if num_valid_points > 1:
                     return np.var(scaled_y_finite)
                else:
                     return 0.0 # Zero error if only one point

            # Define bin edges using linspace
            bins = np.linspace(min_x, max_x, num_bins + 1)
            # Assign each data point to a bin based on its scaled x value
            bin_indices = np.digitize(scaled_x_finite, bins)

            # Calculate variance within each bin
            non_empty_bin_count = 0 # Count bins with enough data for variance calculation
            bin_idx = 1 # Bin indices from digitize start at 1
            while bin_idx <= num_bins:
                # Select scaled y values belonging to the current bin
                y_in_bin = scaled_y_finite[bin_indices == bin_idx]
                # Calculate variance only if there are at least 2 points in the bin
                if len(y_in_bin) > 1:
                    total_error = total_error + np.var(y_in_bin)
                    non_empty_bin_count = non_empty_bin_count + 1
                bin_idx = bin_idx + 1 # Increment bin index

            # Return average variance across non-empty bins (lower is better collapse)
            if non_empty_bin_count > 0:
                average_variance = total_error / non_empty_bin_count
                return average_variance
            else:
                # If no bins had enough data, return infinity (indicates poor parameter choice)
                return np.inf

        except Exception as e_obj:
            # Catch any unexpected errors during error calculation
            warnings.warn(f"Error in objective function: {e_obj}", RuntimeWarning)
            return np.inf # Return high error on any calculation failure


    # --- Run Optuna Study ---
    n_optuna_trials = 100 # Number of optimization trials (adjust as needed for convergence)
    print(f"  Running Optuna study ({n_optuna_trials} trials) to find best FSS parameters for Chi...")
    # Create study object, aiming to minimize the objective function (collapse error)
    study_chi = optuna.create_study(direction='minimize')
    optimization_success = False
    try:
        # Run the optimization process
        study_chi.optimize(
            objective_fss_chi, # The function to minimize
            n_trials=n_optuna_trials, # Number of trials to run
            show_progress_bar=True # Display progress bar
        )
        optimization_success = True # Mark as successful if optimize completes

    except Exception as optuna_err:
        print(f"❌ Error during Optuna optimization: {optuna_err}")
        traceback.print_exc(limit=2)
        global_optuna_fss_chi_results = {'success': False} # Store failure state
        analysis_error = True # Mark analysis as failed

    # --- Process and Store Best Results ---
    if optimization_success:
        if study_chi.best_trial:
            best_params = study_chi.best_params # Dictionary of best parameters found
            best_value = study_chi.best_value   # The minimum objective value achieved

            # Extract best parameters
            pc_opt = best_params['pc']
            gamma_nu_opt = best_params['gamma_over_nu']
            one_nu_opt = best_params['one_over_nu']

            # Calculate original exponents gamma and nu from the optimized ratios
            nu_opt = np.nan
            gamma_opt = np.nan
            # Avoid division by zero if 1/nu is very close to zero
            if abs(one_nu_opt) > 1e-6:
                 nu_opt = 1.0 / one_nu_opt
                 gamma_opt = gamma_nu_opt * nu_opt # gamma = (gamma/nu) * nu
            else:
                 warnings.warn("Optuna result 1/nu is too close to zero. Cannot calculate nu and gamma.", RuntimeWarning)


            # Store results in the global dictionary for this cell
            global_optuna_fss_chi_results = {
                'pc': pc_opt,
                'gamma': gamma_opt,
                'nu': nu_opt,
                'gamma_over_nu': gamma_nu_opt, # Store the ratio directly optimized
                'one_over_nu': one_nu_opt,     # Store the ratio directly optimized
                'success': True,
                'objective': best_value        # Store the best collapse error found
            }
            print("\n  ✅ Optuna FSS Optimization Successful for Chi:")
            print(f"     Best Objective Value (Avg Variance): {best_value:.4e}")
            print(f"     p_c (Optuna) ≈ {pc_opt:.6f}")
            print(f"     γ (Optuna)   ≈ {format_metric(gamma_opt, '%.4f')}")
            print(f"     ν (Optuna)   ≈ {format_metric(nu_opt, '%.4f')}")
            print(f"     (Optimized Ratios: γ/ν ≈ {gamma_nu_opt:.4f}, 1/ν ≈ {one_nu_opt:.4f})")
        else:
             print("  ❌ Optuna study completed but reported no best trial was found.")
             global_optuna_fss_chi_results = {'success': False}


    # --- Plot FSS Data Collapse using Optuna Results ---
    if global_optuna_fss_chi_results.get('success', False):
        print("  Generating FSS data collapse plot for Chi using Optuna parameters...")
        # Retrieve best parameters needed for plotting
        pc = global_optuna_fss_chi_results['pc']
        gamma_nu = global_optuna_fss_chi_results['gamma_over_nu']
        one_nu = global_optuna_fss_chi_results['one_over_nu']
        nu_val = global_optuna_fss_chi_results['nu'] # For label formatting

        # Recalculate scaled variables using the optimal parameters
        scaled_x = (ps_chi - pc) * (Ls_chi ** one_nu)
        scaled_y = Ms_chi * (Ls_chi ** (-gamma_nu)) # Y = Chi * L^(-gamma/nu)

        # Create plot
        fig_fss_chi, ax_fss_chi = plt.subplots(figsize=(8, 6))
        # Get unique system sizes for plotting legend and colors
        unique_Ls_plot = sorted(np.unique(Ls_chi))
        # Create color map for different system sizes
        colors = plt.cm.viridis(np.linspace(0, 1, len(unique_Ls_plot)))

        # Plot data for each system size using a loop
        l_idx = 0
        while l_idx < len(unique_Ls_plot):
            L = unique_Ls_plot[l_idx]
            # Create mask to select data for the current system size L
            mask = Ls_chi == L
            # Scatter plot for this system size
            ax_fss_chi.scatter(scaled_x[mask], scaled_y[mask],
                               label=f'N={int(L)}', # Legend label
                               color=colors[l_idx], # Color based on size
                               alpha=0.7, s=20) # Adjust alpha/size for visibility
            l_idx += 1 # Increment loop counter

        # Configure plot labels and title
        xlabel_str = f'$(p - p_c) N^{{1/\\nu}}$  (p$_c$≈{pc:.4f}, ν≈{format_metric(nu_val,"%.3f")})'
        ylabel_str = f'$\\chi \\times N^{{-\\gamma/\\nu}}$  (γ/ν≈{gamma_nu:.3f})'
        ax_fss_chi.set_xlabel(xlabel_str)
        ax_fss_chi.set_ylabel(ylabel_str)
        ax_fss_chi.set_title(f'FSS Data Collapse for Susceptibility χ (Optuna Fit - WS)')
        ax_fss_chi.grid(True, linestyle=':')
        ax_fss_chi.legend(title='System Size N')

        # Optional: Adjust plot limits based on scaled data range if needed
        # x_min, x_max = np.percentile(scaled_x[np.isfinite(scaled_x)], [1, 99])
        # y_min, y_max = np.percentile(scaled_y[np.isfinite(scaled_y)], [1, 99])
        # ax_fss_chi.set_xlim(x_min * 1.1, x_max * 1.1)
        # ax_fss_chi.set_ylim(y_min * 1.1, y_max * 1.1)

        plt.tight_layout() # Adjust layout
        # Define filename and save the plot
        fss_chi_plot_filename = os.path.join(output_dir, f"{exp_name}_WS_Susceptibility_FSS_collapse_OPTUNA.png")
        try:
            plt.savefig(fss_chi_plot_filename, dpi=150)
            print(f"  ✅ FSS Chi Collapse plot (Optuna) saved to: {fss_chi_plot_filename}")
        except Exception as e_save:
            print(f"  ❌ Error saving FSS Chi plot: {e_save}")
        plt.close(fig_fss_chi) # Close plot figure to free memory
    else:
        # Message if Optuna failed or no results found
        print("  Skipping FSS Chi collapse plot as Optuna optimization did not yield successful results.")

# Final message if analysis was skipped due to initial errors
elif analysis_error:
    print("\n❌ Skipping Analysis Steps 9.2-9.3 due to configuration or diagnostic errors.")

print("\n✅ Cell 9: Analysis completed.")


--- Cell 9: Critical Point Analysis (FSS on Susceptibility with Optuna) ---
✅ Configuration dictionary loaded.
❌ FATAL: Missing key ''OUTPUT_DIR'' in loaded configuration.

❌ Skipping Analysis Steps 9.2-9.3 due to configuration or diagnostic errors.

✅ Cell 9: Analysis completed.


In [10]:
# Cell 10: Report Final Critical Parameters (WS Model)
# Description: Reports the final, most reliable estimates for the critical point (pc)
#              and exponents (gamma, nu) based on the successful Optuna FSS analysis
#              of Susceptibility (Chi) from Cell 9. Beta remains undetermined by this method.

import numpy as np
import os
import json
import pandas as pd # Import pandas for safe checking

print("\n--- Cell 10: Report Final Critical Parameters (WS Model) ---")

# --- Prerequisites ---
reporting_error = False
# Check if config exists (should be loaded by Cell 1 or earlier)
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ Cannot report final parameters: Config dictionary missing. Run Cell 1.")
    reporting_error = True

# Check for results from Optuna FSS on Chi (from Cell 9)
optuna_results_exist = False
if 'global_optuna_fss_chi_results' in globals():
    if isinstance(global_optuna_fss_chi_results, dict):
         if global_optuna_fss_chi_results.get('success', False):
              optuna_results_exist = True
         else:
              print("❌ Cannot report final parameters: Optuna FSS Chi optimization failed (success=False).")
              reporting_error = True
    else:
         print("❌ Cannot report final parameters: Optuna FSS Chi results 'global_optuna_fss_chi_results' is not a dictionary.")
         reporting_error = True
else:
    print("❌ Cannot report final parameters: Optuna FSS Chi results 'global_optuna_fss_chi_results' missing (Run Cell 9).")
    reporting_error = True

# Load necessary config variables if no error yet
output_dir = None
exp_name = None
primary_metric = None
if not reporting_error:
     try:
          output_dir = config['OUTPUT_DIR']
          exp_name = config['EXPERIMENT_NAME']
          primary_metric = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm') # Metric for context
     except KeyError as e_key_rep:
           print(f"❌ Cannot report final parameters: Missing key '{e_key_rep}' in config.")
           reporting_error = True


# --- Report Final Parameters from Optuna FSS Chi ---
if not reporting_error:
    # Safely get results from the Optuna dictionary using .get() with NaN default
    pc_final = global_optuna_fss_chi_results.get('pc', np.nan)
    gamma_final = global_optuna_fss_chi_results.get('gamma', np.nan)
    nu_final = global_optuna_fss_chi_results.get('nu', np.nan)
    success = global_optuna_fss_chi_results.get('success', False) # Should be True here

    # Helper function for consistent formatting
    def format_report(value, fmt):
        try: return fmt % value if pd.notna(value) else "N/A"
        except (TypeError, ValueError): return "Format Error"

    print(f"  ✅ Final Critical Parameters for WS Model Transition (from Susceptibility χ FSS):")
    print(f"     Critical Point (p_c): {format_report(pc_final, '%.6f')}")
    print(f"     Exponent Gamma (γ):   {format_report(gamma_final, '%.4f')}")
    print(f"     Exponent Nu (ν):      {format_report(nu_final, '%.4f')}")
    print(f"\n  Note: Exponent Beta (β) related to the order parameter ('{primary_metric}')")
    print("        could not be reliably determined using standard FSS collapse methods in Phase 1.")
    print("        Susceptibility (χ) FSS proved most effective.")

    # --- Save Key Metrics ---
    key_metrics_path = os.path.join(output_dir, f"{exp_name}_key_metrics.json")
    # Load existing metrics if file exists, update with new values
    key_metrics = {}
    if os.path.exists(key_metrics_path):
        try:
             with open(key_metrics_path, 'r') as f_read_metrics:
                  key_metrics = json.load(f_read_metrics)
             # Basic validation if loaded data is a dictionary
             if not isinstance(key_metrics, dict):
                  warnings.warn(f"Existing key metrics file '{key_metrics_path}' is not a valid JSON dictionary. Overwriting.", RuntimeWarning)
                  key_metrics = {}
        except json.JSONDecodeError as e_load_json:
             warnings.warn(f"Could not decode existing key metrics file '{key_metrics_path}'. Overwriting. Error: {e_load_json}", RuntimeWarning)
             key_metrics = {}
        except Exception as e_load:
             warnings.warn(f"Could not load existing key metrics file '{key_metrics_path}'. Overwriting. Error: {e_load}", RuntimeWarning)
             key_metrics = {}

    # Update with final WS values (prefixing to avoid name clashes if other models analyzed later)
    # Store results specifically from the Chi FSS analysis
    key_metrics['final_pc_ws_chi'] = pc_final
    key_metrics['final_gamma_ws_chi'] = gamma_final
    key_metrics['final_nu_ws_chi'] = nu_final
    key_metrics['ws_chi_fss_success'] = success
    key_metrics['ws_chi_fss_objective'] = global_optuna_fss_chi_results.get('objective', np.nan)


    # Optionally include original FSS results for comparison if needed
    # (Assuming original FSS was attempted and stored in global_fss_results_orig)
    # if 'global_fss_results_orig' in globals() and isinstance(global_fss_results_orig, dict):
    #     if global_fss_results_orig.get('success'):
    #         key_metrics['orig_fss_pc_ws_var'] = global_fss_results_orig.get('pc')
    #         key_metrics['orig_fss_beta_ws_var'] = global_fss_results_orig.get('beta')
    #         key_metrics['orig_fss_nu_ws_var'] = global_fss_results_orig.get('nu')
    #     key_metrics['orig_fss_var_success'] = global_fss_results_orig.get('success', False)


    # Save the updated metrics dictionary to JSON
    try:
        with open(key_metrics_path, 'w') as f_write_metrics:
            # Use default serializer to handle potential numpy types if any remain
             def default_serializer(obj):
                if isinstance(obj, (np.bool_)): return bool(obj)
                if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64)): return int(obj)
                if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): return float(obj) if np.isfinite(obj) else None # Convert NaN/Inf to None
                if isinstance(obj, (np.ndarray,)): return obj.tolist() # Convert arrays to lists
                try: return str(obj)
                except TypeError: return repr(obj) # Fallback if str fails

             json.dump(key_metrics, f_write_metrics, indent=4, default=default_serializer)
        print(f"\n  ✅ Saved/Updated final WS critical parameters to: {key_metrics_path}")
    except TypeError as e_type_save:
        print(f"  ⚠️ Error saving final key metrics (TypeError - check data types): {e_type_save}")
        traceback.print_exc(limit=1)
    except Exception as e_save:
        print(f"  ⚠️ Error saving final key metrics: {e_save}")

else:
    print("❌ Skipping final parameter reporting due to missing or failed analysis results.")

print("\n✅ Cell 10: Final critical parameter reporting completed.")


--- Cell 10: Report Final Critical Parameters (WS Model) ---
❌ Cannot report final parameters: Optuna FSS Chi optimization failed (success=False).
❌ Skipping final parameter reporting due to missing or failed analysis results.

✅ Cell 10: Final critical parameter reporting completed.


In [11]:
# Cell 11: Universality Testing Sweeps (GPU - Final Implementation - Indentation Fix)
# Description: Runs or loads sweeps for SBM and RGG models using the GPU-enabled
#              run_single_instance function. Combines results. Corrects indentation error.

import pandas as pd
import numpy as np
import networkx as nx
import time
import os
import pickle
import itertools
import warnings
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm.auto import tqdm
import multiprocessing as mp # Ensure imported
import torch # Ensure imported
import traceback # Ensure imported

print("\n--- Cell 11: Universality Testing Sweeps (GPU - Final Implementation - Indentation Fix) ---")

# --- Configuration & Prerequisite Checks ---
analysis_error_uni = False
if 'config' not in globals():
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_uni = True
if 'global_device' not in globals():
    print("❌ FATAL: Global device not defined. Run Cell 0."); analysis_error_uni = True
if 'get_sweep_parameters' not in globals() or 'generate_graph' not in globals():
     print("❌ FATAL: Helper functions missing. Run Cell 2."); analysis_error_uni = True
# Check worker function availability
worker_func = None
if 'run_single_instance' in globals():
    worker_func = run_single_instance # Use Phase 1 worker if available
    print("  Using Phase 1 worker 'run_single_instance'.")
elif 'run_single_instance_phase2' in globals():
     worker_func = run_single_instance_phase2 # Fallback to Phase 2 worker
     print("  Using Phase 2 worker 'run_single_instance_phase2'.")
else:
     print("❌ FATAL: No suitable worker function found ('run_single_instance' or 'run_single_instance_phase2').")
     analysis_error_uni = True


# --- Load Configuration Variables ---
if not analysis_error_uni:
     try:
         device = global_device
         output_dir = config['OUTPUT_DIR']; exp_name = config['EXPERIMENT_NAME']
         system_sizes_uni = config['SYSTEM_SIZES']
         graph_params_all = config['GRAPH_MODEL_PARAMS']
         num_instances = config['NUM_INSTANCES_PER_PARAM']
         num_trials = config['NUM_TRIALS_PER_INSTANCE']
         workers = config['PARALLEL_WORKERS']
         rule_params_base = config['RULE_PARAMS']
         max_steps = config['MAX_SIMULATION_STEPS']
         conv_thresh = config['CONVERGENCE_THRESHOLD']
         state_dim = config['STATE_DIM']
         calculate_energy = config.get('CALCULATE_ENERGY', False)
         store_energy_history = config.get('STORE_ENERGY_HISTORY', False)
         energy_type = config.get('ENERGY_FUNCTIONAL_TYPE', 'pairwise_dot')
         all_metrics = config.get('ORDER_PARAMETERS_TO_ANALYZE', [])
     except KeyError as e_key_uni:
          print(f"❌ FATAL: Missing key '{e_key_uni}' in config for universality sweeps.")
          analysis_error_uni = True
     except Exception as e_conf_uni:
          print(f"❌ FATAL: Error loading config for universality sweeps: {e_conf_uni}.")
          analysis_error_uni = True

# --- File Paths & Loading Existing Results ---
combined_results_file = None
combined_pickle_file = None
all_universality_results_list = []
models_available = []
models_to_run = []

if not analysis_error_uni:
    combined_results_file = os.path.join(output_dir, f"{exp_name}_universality_COMBINED_results.csv")
    combined_pickle_file = os.path.join(output_dir, f"{exp_name}_universality_COMBINED_partial.pkl")
    models_available = list(graph_params_all.keys()) # Get all models defined in config
    models_to_run = models_available[:] # Start assuming all models need to run

    # Robust loading logic for combined_pickle_file/CSV
    if os.path.exists(combined_pickle_file):
        try:
            with open(combined_pickle_file, 'rb') as f_load_pkl:
                all_universality_results_list = pickle.load(f_load_pkl)
            if isinstance(all_universality_results_list, list) and len(all_universality_results_list) > 0:
                 # If pickle loaded successfully, determine which models are already present
                 loaded_df = pd.DataFrame(all_universality_results_list)
                 if 'model' in loaded_df.columns:
                     models_completed = loaded_df['model'].unique()
                     # Update models_to_run by removing completed models
                     models_to_run = [m for m in models_available if m not in models_completed]
                     print(f"  Loaded {len(all_universality_results_list)} combined results from pickle. Models already completed: {list(models_completed)}")
                 else:
                      warnings.warn("Loaded pickle file is missing 'model' column. Cannot determine completed models reliably.", RuntimeWarning)
                      # Assume need to run all if 'model' column is missing
                      models_to_run = models_available[:]
                      all_universality_results_list = [] # Reset list if format is unexpected
            else:
                  # Reset list if loaded object is not a non-empty list
                  all_universality_results_list = []
        except Exception as e_load_uni_pkl:
            warnings.warn(f"Could not load or parse universality pickle file '{combined_pickle_file}'. Error: {e_load_uni_pkl}", RuntimeWarning)
            all_universality_results_list = [] # Reset list on failure

    # If pickle didn't exist or failed load, check for CSV (less ideal as vectors are lost)
    elif os.path.exists(combined_results_file):
         warnings.warn(f"Pickle file not found, attempting to load from CSV '{combined_results_file}'. Note: State vectors will be missing.", RuntimeWarning)
         try:
              # Load CSV, assuming it has the 'model' column
              loaded_df_csv = pd.read_csv(combined_results_file)
              if 'model' in loaded_df_csv.columns:
                   # Convert CSV rows to dictionaries (approximates structure, loses vectors)
                   all_universality_results_list = loaded_df_csv.to_dict('records')
                   models_completed_csv = loaded_df_csv['model'].unique()
                   models_to_run = [m for m in models_available if m not in models_completed_csv]
                   print(f"  Loaded {len(all_universality_results_list)} results from CSV. Models already completed: {list(models_completed_csv)}")
              else:
                   warnings.warn("Loaded CSV file is missing 'model' column. Assuming no models completed.", RuntimeWarning)
                   models_to_run = models_available[:]
                   all_universality_results_list = []
         except Exception as e_load_uni_csv:
              warnings.warn(f"Could not load universality CSV file '{combined_results_file}'. Error: {e_load_uni_csv}", RuntimeWarning)
              models_to_run = models_available[:]
              all_universality_results_list = []


    print(f"  Models remaining to run for Universality tests: {models_to_run}")

# --- Run Sweeps for Remaining Models ---
if not analysis_error_uni and models_to_run:
    print("\n--- Running Individual Model Universality Sweeps ---")
    # Set spawn method if needed (should be done in Cell 0)
    try:
        if mp.get_start_method(allow_none=True) != 'spawn':
             print("🚨 WARNING: Multiprocessing start method not 'spawn'. Forcing again.")
             mp.set_start_method('spawn', force=True)
    except Exception as e_set_spawn:
         print(f"⚠️ Warning: Could not force 'spawn' start method: {e_set_spawn}")

    # Iterate through models that need to be run
    model_idx_outer = 0
    while model_idx_outer < len(models_to_run):
        model_name = models_to_run[model_idx_outer]
        print(f"\n--- Running Universality Experiment for Model: {model_name} ---")
        # Get parameters for the current model
        model_params = config['GRAPH_MODEL_PARAMS'].get(model_name, {})
        if not model_params:
             warnings.warn(f"No parameters found for model '{model_name}' in config. Skipping.", RuntimeWarning)
             model_idx_outer += 1; continue

        # Find the sweep parameter name for this model dynamically
        param_name_uni = None
        param_col_name_uni = None
        model_param_iter = iter(model_params.items())
        stop_model_param_iter = False
        while not stop_model_param_iter:
             try:
                  key, value = next(model_param_iter)
                  if isinstance(value, (list, np.ndarray)):
                       param_name_uni = key.replace('_values', '')
                       param_col_name_uni = param_name_uni + '_value'
                       stop_model_param_iter = True
             except StopIteration:
                  stop_model_param_iter = True
        # Fallback if no list/array found
        if param_name_uni is None:
             warnings.warn(f"Could not find sweep parameter for model {model_name}. Assuming 'param'.", RuntimeWarning)
             param_name_uni = 'param'; param_col_name_uni = 'param_value'


        # --- Setup per-model Logging & Partial Results ---
        model_log_file = os.path.join(output_dir, f"{exp_name}_universality_{model_name}.log")
        model_partial_results_file = os.path.join(output_dir, f"{exp_name}_universality_{model_name}_partial.pkl")
        model_completed_tasks = set(); model_results_list = [] # Reset for each model

        # --- Robust loading for per-model files (to handle restarts within a model run) ---
        if os.path.exists(model_log_file):
            try:
                with open(model_log_file, 'r') as f_mlog:
                    line_idx=0
                    for line in f_mlog:
                        sig = line.strip();
                        if sig: model_completed_tasks.add(sig)
                        line_idx+=1
            except Exception as e_mlog: warnings.warn(f"Could not load model log {model_log_file}: {e_mlog}", RuntimeWarning)
        if os.path.exists(model_partial_results_file):
            try:
                with open(model_partial_results_file, 'rb') as f_mpkl: model_results_list = pickle.load(f_mpkl)
                # Rebuild signatures from loaded model results if list is valid
                if isinstance(model_results_list, list) and len(model_results_list) > 0:
                     temp_df_sig_model = pd.DataFrame(model_results_list)
                     required_sig_cols_model = ['N', param_col_name_uni, 'instance', 'trial']
                     cols_exist_model = all(k in temp_df_sig_model.columns for k in required_sig_cols_model)
                     if cols_exist_model:
                          rebuilt_model_sigs = set()
                          row_idx_m = 0
                          while row_idx_m < len(temp_df_sig_model):
                               row_m = temp_df_sig_model.iloc[row_idx_m]
                               try:
                                   sig_m = f"N={int(row_m['N'])}_{param_name_uni}={row_m[param_col_name_uni]:.5f}_inst={int(row_m['instance'])}_trial={int(row_m['trial'])}"
                                   rebuilt_model_sigs.add(sig_m)
                               except Exception: pass # Ignore formatting errors
                               row_idx_m += 1
                          model_completed_tasks.update(rebuilt_model_sigs)
                     else: warnings.warn(f"Model partial results for {model_name} missing columns for signature rebuild.", RuntimeWarning)
                     del temp_df_sig_model
                else: model_results_list = [] # Reset if not list or empty
            except Exception as e_mpkl: warnings.warn(f"Could not load model partial results {model_partial_results_file}: {e_mpkl}", RuntimeWarning); model_results_list = []

        # Generate & Filter tasks for the current model
        uni_tasks_model = get_sweep_parameters(
            graph_model_name=model_name, model_params=model_params,
            system_sizes=system_sizes_uni, instances=num_instances, trials=num_trials
        )
        model_tasks_to_run = []; # List of tasks still needing execution
        task_filter_idx = 0
        while task_filter_idx < len(uni_tasks_model):
            task_params = uni_tasks_model[task_filter_idx]
            # Check if the parameter column exists
            if param_col_name_uni not in task_params:
                 warnings.warn(f"Task {task_filter_idx} for {model_name} missing key '{param_col_name_uni}'. Skipping.", RuntimeWarning)
            else:
                 # Create signature and check against completed set
                 try:
                     task_sig = f"N={int(task_params['N'])}_{param_name_uni}={task_params[param_col_name_uni]:.5f}_inst={int(task_params['instance'])}_trial={int(task_params['trial'])}"
                     if task_sig not in model_completed_tasks:
                         model_tasks_to_run.append(task_params)
                 except Exception: pass # Ignore tasks that cause signature errors
            task_filter_idx += 1

        print(f"Prepared {len(uni_tasks_model)} total tasks for {model_name}. Need to run {len(model_tasks_to_run)} new tasks.")

        # Execute if needed
        if model_tasks_to_run:
            model_start_time = time.time(); model_futures = {}; pool_broken_flag_model = False
            executor_instance_model = ProcessPoolExecutor(max_workers=workers)
            try:
                # Submit tasks for the current model
                submit_idx_m = 0
                while submit_idx_m < len(model_tasks_to_run):
                    task_params = model_tasks_to_run[submit_idx_m]
                    # Double check key exists before graph generation
                    if param_col_name_uni not in task_params: submit_idx_m += 1; continue

                    graph_gen_params_m = task_params.get('fixed_params', {}).copy()
                    graph_gen_params_m[param_name_uni] = task_params[param_col_name_uni] # Add sweep param with base name
                    G = generate_graph( task_params['model'], graph_gen_params_m, task_params['N'], task_params['graph_seed'] )

                    if G is None or G.number_of_nodes() == 0: submit_idx_m += 1; continue # Skip failed graph gen

                    # Submit task using the selected worker function
                    future = executor_instance_model.submit(
                        worker_func, # The globally selected worker
                        graph=G, N=task_params['N'], instance_params=task_params, trial_seed=task_params['sim_seed'],
                        rule_params_in=rule_params_base, max_steps=max_steps, conv_thresh=conv_thresh, state_dim=state_dim,
                        calculate_energy=calculate_energy, store_energy_history=store_energy_history,
                        energy_type=energy_type, metrics_to_calc=all_metrics, device=str(device)
                    )
                    model_futures[future] = task_params
                    submit_idx_m += 1

                # Collect results for the current model
                pbar_model = tqdm(total=len(model_futures), desc=f"Sweep ({model_name})", mininterval=2.0, unit="task")
                log_freq_m = max(1, len(model_futures)//50); save_freq_m = max(20, len(model_futures)//10)
                tasks_done_m_since_save = 0
                with open(model_log_file, 'a') as f_log_model:
                    future_get_idx = 0
                    futures_list_m = list(model_futures.keys()) # Get a list of futures to iterate through
                    while future_get_idx < len(futures_list_m):
                        future = futures_list_m[future_get_idx]
                        task_params = model_futures[future] # Get original params

                        if pool_broken_flag_model: pbar_model.update(1); future_get_idx+=1; continue # Skip if pool broke

                        try:
                            result_dict = future.result(timeout=1200) # Use timeout
                            if result_dict is not None and isinstance(result_dict, dict):
                                 # Combine task params and result dict
                                 full_result = {**task_params, **result_dict}
                                 model_results_list.append(full_result); tasks_done_m_since_save += 1
                                 # Log completion periodically
                                 param_val_key_l = param_col_name_uni
                                 is_log_step_m = (future_get_idx % log_freq_m == 0)
                                 success_m = result_dict.get('error_message') is None
                                 key_exists_m = param_val_key_l in task_params
                                 if is_log_step_m and success_m and key_exists_m:
                                     try:
                                         task_sig = f"N={int(task_params['N'])}_{param_name_uni}={task_params[param_val_key_l]:.5f}_inst={int(task_params['instance'])}_trial={int(task_params['trial'])}"
                                         f_log_model.write(f"{task_sig}\n"); f_log_model.flush()
                                     except Exception: pass # Ignore logging errors
                        except Exception as e_get_m:
                             error_str_m = str(e_get_m)
                             is_broken_m = False
                             if "Broken" in error_str_m or "abruptly" in error_str_m or "shutdown" in error_str_m: is_broken_m = True
                             elif isinstance(e_get_m, TypeError) or isinstance(e_get_m, AttributeError): is_broken_m = True
                             if is_broken_m:
                                  print(f"\n❌ ERROR: Pool broke during {model_name} run. Exception: {type(e_get_m).__name__}: {e_get_m}"); pool_broken_flag_model = True
                             else: # Log other errors (e.g., Timeout)
                                  warnings.warn(f"Error getting result for {model_name} task {task_params}: {type(e_get_m).__name__}", RuntimeWarning)
                                  error_res_m = {**task_params, 'error_message': f"Future failed: {type(e_get_m).__name__}"}
                                  model_results_list.append(error_res_m); tasks_done_m_since_save += 1


                        finally:
                             pbar_model.update(1)
                             # *** CORRECTED INDENTATION START ***
                             # Save partial results for this model if frequency is met
                             should_save_m = tasks_done_m_since_save >= save_freq_m
                             if should_save_m:
                                 try:
                                     with open(model_partial_results_file, 'wb') as f_p: pickle.dump(model_results_list, f_p)
                                     tasks_done_m_since_save = 0 # Reset counter after successful save
                                 except Exception as e_sp_m: warnings.warn(f"Could not save partial results for {model_name}: {e_sp_m}", RuntimeWarning)
                             # *** CORRECTED INDENTATION END ***
                             future_get_idx += 1 # Increment get loop index


                except KeyboardInterrupt: print(f"\nInterrupted ({model_name}).")
                finally: pbar_model.close();

            except Exception as main_e_model: print(f"\n❌ ERROR during {model_name} setup: {main_e_model}"); traceback.print_exc(limit=2)
            finally:
                print(f"Shutting down executor ({model_name})..."); executor_instance_model.shutdown(wait=True, cancel_futures=True); print("Executor shut down.")
                # Attempt final save for the current model
                try:
                    with open(model_partial_results_file, 'wb') as f_p_final: pickle.dump(model_results_list, f_p_final)
                except Exception as e_spf: warnings.warn(f"Could not save final partial results for {model_name}: {e_spf}", RuntimeWarning)

            model_end_time = time.time()
            print(f"  ✅ Sweep for {model_name} completed ({model_end_time - model_start_time:.1f}s).")

        # Add model results to the main list, avoiding duplicates
        added_count = 0
        if isinstance(model_results_list, list) and len(model_results_list) > 0:
             # Rebuild existing signatures from the main list to prevent duplicates
             existing_signatures = set();
             if isinstance(all_universality_results_list, list) and len(all_universality_results_list) > 0:
                 try:
                     # Define keys needed to build a unique signature
                     param_keys = ['model', 'N', 'instance', 'trial', param_col_name_uni]
                     # Check if keys exist in the first item (assume consistent structure)
                     if all(k in all_universality_results_list[0] for k in param_keys):
                          sig_idx = 0
                          while sig_idx < len(all_universality_results_list):
                               res = all_universality_results_list[sig_idx]
                               try: existing_signatures.add(tuple(res.get(k) for k in param_keys))
                               except Exception: pass # Ignore signature errors
                               sig_idx += 1
                     else: warnings.warn("Could not build existing signatures - key mismatch.", RuntimeWarning)
                 except Exception as e_sig_build: warnings.warn(f"Error building existing signatures: {e_sig_build}", RuntimeWarning)

             # Iterate through results from the current model and add if signature is new
             new_result_idx = 0
             while new_result_idx < len(model_results_list):
                 res = model_results_list[new_result_idx]
                 try:
                      param_keys_check = ['model', 'N', 'instance', 'trial', param_col_name_uni]
                      # Check if keys exist in current result
                      if all(k in res for k in param_keys_check):
                           sig_tuple_check = tuple(res.get(k) for k in param_keys_check)
                           if sig_tuple_check not in existing_signatures:
                                all_universality_results_list.append(res); existing_signatures.add(sig_tuple_check); added_count += 1
                      else: warnings.warn("Result missing keys for signature check.", RuntimeWarning)
                 except Exception as e_add_res: warnings.warn(f"Error processing result for adding to main list: {e_add_res}", RuntimeWarning)
                 new_result_idx += 1

        print(f"  Added {added_count} new results from {model_name} to combined list (Total: {len(all_universality_results_list)}).")

        # Save combined list incrementally after each model finishes
        try:
            with open(combined_pickle_file, 'wb') as f_comb_partial: pickle.dump(all_universality_results_list, f_comb_partial)
        except Exception as e_sc_inc: warnings.warn(f"Incremental save of combined results failed: {e_sc_inc}", RuntimeWarning)

        # Check if pool broke during this model's run
        if pool_broken_flag_model:
             print(f"❌ Aborting universality sweeps because process pool failed during {model_name} run.")
             analysis_error_uni = True # Mark analysis as errored
             break # Exit the outer while loop over models

        model_idx_outer += 1 # Move to the next model

# --- Final Combine and Save ---
if not all_universality_results_list:
    print("\n⚠️ No universality results collected or loaded.")
elif not analysis_error_uni: # Only save if no pool break occurred
    print("\n--- Combining Universality Results ---")
    combined_df = pd.DataFrame(all_universality_results_list)
    # Check for errors reported by workers across all models
    if 'error_message' in combined_df.columns:
         failed_run_count_comb = combined_df['error_message'].notna().sum()
         if failed_run_count_comb > 0:
              warnings.warn(f"{failed_run_count_comb} total universality runs reported errors.", RuntimeWarning)

    # Save final combined results
    try:
        # Save metadata (without large vectors) to CSV
        cols_to_save_uni = [col for col in combined_df.columns if col not in ['final_state_vector', 'state_history', 'avg_change_history', 'baseline_state_for_spread']]
        combined_df[cols_to_save_uni].to_csv(combined_results_file, index=False)
        print(f"\n✅ Combined universality metadata ({combined_df.shape[0]}) saved to CSV: {combined_results_file}")
        # Save the full list (including vectors if present) to pickle
        with open(combined_pickle_file, 'wb') as f_comb_final:
             pickle.dump(all_universality_results_list, f_comb_final)
        print(f"\n✅ Combined universality full data saved to Pickle: {combined_pickle_file}")

    except Exception as e:
        print(f"❌ Error saving final combined universality results: {e}")
        traceback.print_exc(limit=1)

    # Store in global variable only if successfully processed
    global_universality_results = combined_df

else: # Case where analysis_error_uni is True (e.g., pool broke)
     print("\n--- Skipped Final Saving of Universality Results due to Sweep Errors ---")
     # Store partial results if available
     global_universality_results = pd.DataFrame(all_universality_results_list)


# Add a final check for the global variable
if 'global_universality_results' in globals() and isinstance(global_universality_results, pd.DataFrame) and not global_universality_results.empty:
     print(f"\n✅ `global_universality_results` DataFrame created (Shape: {global_universality_results.shape}).")
else:
     print("\n⚠️ `global_universality_results` DataFrame is empty or not created.")


print("\n✅ Cell 11: Universality testing sweeps completed or loaded.")

SyntaxError: invalid syntax (2091917542.py, line 318)

In [None]:
# Cell 11.1: Critical Point & Exponent Analysis (SBM Model - FSS on Chi with Optuna)
# Description: Analyzes SBM universality results. Calculates Susceptibility (Chi).
#              Uses Optuna to find the best FSS parameters (pc, gamma/nu, 1/nu) for Chi.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit, minimize # Keep minimize available
import warnings
import os
import traceback
import json
import optuna  # Import Optuna

# --- Suppress Optuna INFO messages ---
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("\n--- Cell 11.1: Critical Point & Exponent Analysis (SBM Model - FSS on Chi with Optuna) ---")

# --- Prerequisites & Configuration ---
analysis_error_sbm = False
# Check for config dictionary
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_sbm = True
# Check for combined universality results DataFrame
if 'global_universality_results' not in globals():
    print("❌ FATAL: Combined universality DataFrame 'global_universality_results' missing. Run Cell 11.")
    analysis_error_sbm = True
elif not isinstance(global_universality_results, pd.DataFrame):
     print("❌ FATAL: 'global_universality_results' is not a Pandas DataFrame.")
     analysis_error_sbm = True
elif global_universality_results.empty:
    print("❌ FATAL: Combined universality DataFrame 'global_universality_results' is empty.")
    analysis_error_sbm = True
elif 'SBM' not in global_universality_results['model'].unique():
    # Check if SBM model data specifically is present
    print("❌ FATAL: No 'SBM' model results found in combined universality DataFrame.")
    analysis_error_sbm = True

# Load necessary config parameters if no error yet
output_dir = None
exp_name = None
primary_metric_sbm = None
system_sizes_sbm = None
param_name_sbm = None # Specific parameter name for SBM sweep
if not analysis_error_sbm:
    try:
        output_dir = config['OUTPUT_DIR']
        exp_name = config['EXPERIMENT_NAME']
        primary_metric_sbm = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')  # Need M for moments
        system_sizes_sbm = config.get('SYSTEM_SIZES', [])
        # Dynamically find SBM sweep parameter column name
        sbm_params_cfg = config.get('GRAPH_MODEL_PARAMS', {}).get('SBM', {})
        sbm_sweep_key = next((k for k in sbm_params_cfg if k.endswith('_values')), None)
        if sbm_sweep_key:
             param_name_sbm = sbm_sweep_key.replace('_values', '_value') # e.g., 'p_intra_value'
        else:
             warnings.warn("Could not determine SBM sweep parameter name from config. Assuming 'p_intra_value'.", RuntimeWarning)
             param_name_sbm = 'p_intra_value'

        if not system_sizes_sbm:
            print("❌ FATAL: SYSTEM_SIZES list is empty in config.")
            analysis_error_sbm = True

    except KeyError as e_key_sbm:
        print(f"❌ FATAL: Missing key '{e_key_sbm}' in config for SBM analysis.")
        analysis_error_sbm = True
    except Exception as e_conf_sbm:
        print(f"❌ FATAL: Error loading config for SBM analysis: {e_conf_sbm}.")
        analysis_error_sbm = True

# --- Initialize results dictionary for this cell ---
global_optuna_fss_chi_sbm_results = {}

# --- Filter and Diagnose SBM Data ---
sbm_results_df = pd.DataFrame() # Initialize empty DataFrame
if not analysis_error_sbm:
    print(f"\n--- Step 11.1.1: Filtering and Diagnosing SBM Input Data ---")
    # Filter the combined DataFrame for SBM model results
    sbm_results_df = global_universality_results[global_universality_results['model'] == 'SBM'].copy()

    if sbm_results_df.empty:
        analysis_error_sbm = True
        print("❌ FATAL: SBM results DataFrame is empty after filtering.")
    else:
        print(f"  Filtered SBM DataFrame Shape: {sbm_results_df.shape}")
        # Check required columns for SBM analysis
        required_cols_sbm = ['N', param_name_sbm, primary_metric_sbm, 'instance', 'trial']
        missing_cols_sbm = []
        col_idx = 0
        while col_idx < len(required_cols_sbm):
             col = required_cols_sbm[col_idx]
             if col not in sbm_results_df.columns:
                  missing_cols_sbm.append(col)
             col_idx += 1

        if len(missing_cols_sbm) > 0:
            analysis_error_sbm = True
            print(f"❌ FATAL: SBM data missing required columns: {missing_cols_sbm}.")
        else:
            print(f"  Required columns found: {required_cols_sbm}")
            # Check unique system sizes (N) for SBM
            unique_N_sbm = sbm_results_df['N'].unique()
            print(f"  Unique 'N' for SBM: {sorted(unique_N_sbm)}")
            if len(unique_N_sbm) < 2:
                analysis_error_sbm = True
                print(f"❌ FATAL: Need >= 2 unique 'N' for SBM FSS. Found {len(unique_N_sbm)}.")
            else:
                print("  Sufficient unique 'N' values for SBM FSS.")
                # Diagnose primary metric column
                metric_col_sbm = sbm_results_df[primary_metric_sbm]
                total_sbm = len(metric_col_sbm)
                non_nan_sbm = metric_col_sbm.notna().sum()
                nan_sbm = metric_col_sbm.isna().sum()
                print(f"  SBM Diagnostics for '{primary_metric_sbm}': Total={total_sbm}, Non-NaN={non_nan_sbm}, NaN={nan_sbm}")
                if non_nan_sbm == 0:
                    analysis_error_sbm = True
                    print(f"❌ FATAL: SBM Column '{primary_metric_sbm}' contains only NaNs.")
                else:
                    print("✅ SBM Data seems valid for moment calculation.")

# --- Aggregate Susceptibility for SBM ---
fss_chi_df_sbm = pd.DataFrame() # Initialize empty dataframe
if not analysis_error_sbm:
    print(f"\n--- Step 11.1.2: Aggregating SBM Susceptibility (χ) ---")
    try:
        # Calculate variance of the primary order parameter (M)
        # Convert to numeric, coercing errors
        M_numeric_sbm = pd.to_numeric(sbm_results_df[primary_metric_sbm], errors='coerce')
        # Group by N and the SBM parameter, then calculate variance
        var_M_sbm = sbm_results_df.assign(M_numeric_sbm=M_numeric_sbm).groupby(['N', param_name_sbm], observed=True)['M_numeric_sbm'].var()

        if var_M_sbm.isna().any():
            nan_groups_sbm = var_M_sbm[var_M_sbm.isna()].index.tolist()
            warnings.warn(f"NaNs found in SBM Var(M) calc for {len(nan_groups_sbm)} groups. Dropping.", RuntimeWarning)

        # Calculate Susceptibility: χ = N * Var(M)
        susceptibility_chi_agg_sbm = var_M_sbm.index.get_level_values('N') * var_M_sbm
        # Combine into DataFrame and drop rows with NaN susceptibility
        fss_chi_df_sbm = pd.DataFrame({'susceptibility_chi': susceptibility_chi_agg_sbm}).reset_index().dropna()

        # Check validity of the aggregated DataFrame
        if fss_chi_df_sbm.empty:
             raise ValueError("SBM Chi DataFrame empty after aggregation/dropna.")
        if fss_chi_df_sbm['N'].nunique() < 2:
            raise ValueError(f"SBM Chi DataFrame has < 2 unique sizes ({fss_chi_df_sbm['N'].unique()}) after aggregation/dropna.")

        print(f"  Aggregated SBM Susceptibility ready for FSS (Entries: {len(fss_chi_df_sbm)}).")
        # print("  Sample SBM aggregated data:\n", fss_chi_df_sbm.head())

    except KeyError as e_agg_key_sbm:
        print(f"❌ Error aggregating SBM Chi: Missing column {e_agg_key_sbm}")
        analysis_error_sbm = True
    except Exception as agg_chi_e_sbm:
        print(f"❌ Error aggregating SBM Chi: {agg_chi_e_sbm}")
        traceback.print_exc(limit=1)
        analysis_error_sbm = True

# --- FSS on SBM Susceptibility using Optuna ---
if not analysis_error_sbm:
    print(f"\n--- Step 11.1.3: FSS on SBM Susceptibility using Optuna ---")
    # Prepare data for Optuna objective function
    try:
        Ls_chi_sbm = fss_chi_df_sbm['N'].values.astype(np.float64)
        ps_chi_sbm = fss_chi_df_sbm[param_name_sbm].values.astype(np.float64)  # Use SBM parameter column
        Ms_chi_sbm = fss_chi_df_sbm['susceptibility_chi'].values.astype(np.float64)
    except KeyError as e_fss_prep_sbm:
         print(f"❌ Error preparing SBM FSS data: Missing column {e_fss_prep_sbm}.")
         analysis_error_sbm = True
    except Exception as e_fss_prep_other_sbm:
         print(f"❌ Error preparing SBM FSS data: {e_fss_prep_other_sbm}.")
         analysis_error_sbm = True


    # Define Optuna Objective Function (reusable structure, uses SBM data here)
    # This function should ideally be defined once globally or imported if identical
    def objective_fss_chi_sbm(trial):
        # Suggest parameters for SBM (adjust ranges based on SBM behavior if known)
        pc = trial.suggest_float("pc", 0.01, 0.5)  # SBM p_c likely > 0.01, maybe even higher
        gamma_nu = trial.suggest_float("gamma_over_nu", 0.1, 3.0) # Ratio gamma/nu
        one_nu = trial.suggest_float("one_over_nu", 0.1, 5.0) # Ratio 1/nu

        # --- Calculate scaled variables & collapse error using SBM data ---
        scaled_x = (ps_chi_sbm - pc) * (Ls_chi_sbm ** one_nu)
        scaled_y = Ms_chi_sbm * (Ls_chi_sbm ** (-gamma_nu))

        # Sort by scaled_x for binning
        sorted_indices = np.argsort(scaled_x)
        scaled_x_sorted = scaled_x[sorted_indices]
        scaled_y_sorted = scaled_y[sorted_indices]

        # --- Calculate Collapse Error using Binning (same logic as Cell 9) ---
        total_error = 0.0; num_bins = 20; # Use same binning approach
        try:
            valid_indices = np.isfinite(scaled_x_sorted) & np.isfinite(scaled_y_sorted)
            if not np.any(valid_indices): return np.inf

            scaled_x_finite = scaled_x_sorted[valid_indices]
            scaled_y_finite = scaled_y_sorted[valid_indices]
            num_valid_points = len(scaled_x_finite)
            if num_valid_points < num_bins: num_bins = max(1, num_valid_points // 2)

            min_x = np.min(scaled_x_finite); max_x = np.max(scaled_x_finite)
            if abs(min_x - max_x) < 1e-9: return np.var(scaled_y_finite) if num_valid_points > 1 else 0.0

            bins = np.linspace(min_x, max_x, num_bins + 1)
            bin_indices = np.digitize(scaled_x_finite, bins)
            non_empty_bin_count = 0
            bin_idx = 1
            while bin_idx <= num_bins:
                y_in_bin = scaled_y_finite[bin_indices == bin_idx]
                if len(y_in_bin) > 1:
                    total_error += np.var(y_in_bin); non_empty_bin_count += 1
                bin_idx += 1

            if non_empty_bin_count > 0: return total_error / non_empty_bin_count
            else: return np.inf
        except Exception: return np.inf # Return high error on failure

    # --- Run Optuna Study for SBM ---
    n_optuna_trials_sbm = 100 # Number of trials for SBM optimization
    print(f"  Running Optuna study ({n_optuna_trials_sbm} trials) for SBM Chi...")
    study_chi_sbm = optuna.create_study(direction='minimize')
    optimization_success_sbm = False
    try:
        study_chi_sbm.optimize(objective_fss_chi_sbm, n_trials=n_optuna_trials_sbm, show_progress_bar=True)
        optimization_success_sbm = True
    except Exception as optuna_err_sbm:
        print(f"❌ Error during Optuna SBM optimization: {optuna_err_sbm}")
        traceback.print_exc(limit=1)
        global_optuna_fss_chi_sbm_results = {'success': False} # Store failure
        analysis_error_sbm = True # Mark analysis as failed

    # --- Process and Store Best SBM Results ---
    if optimization_success_sbm:
        if study_chi_sbm.best_trial:
            bp_sbm = study_chi_sbm.best_params # Best parameters found
            bv_sbm = study_chi_sbm.best_value   # Best objective value

            pc_opt_sbm = bp_sbm['pc']
            gamma_nu_opt_sbm = bp_sbm['gamma_over_nu']
            one_nu_opt_sbm = bp_sbm['one_over_nu']

            # Calculate original exponents
            nu_opt_sbm = np.nan; gamma_opt_sbm = np.nan
            if abs(one_nu_opt_sbm) > 1e-6:
                nu_opt_sbm = 1.0 / one_nu_opt_sbm
                gamma_opt_sbm = gamma_nu_opt_sbm * nu_opt_sbm
            else: warnings.warn("SBM Optuna result 1/nu too close to zero.", RuntimeWarning)

            # Store results
            global_optuna_fss_chi_sbm_results = {
                'pc': pc_opt_sbm,
                'gamma': gamma_opt_sbm,
                'nu': nu_opt_sbm,
                'gamma_over_nu': gamma_nu_opt_sbm,
                'one_over_nu': one_nu_opt_sbm,
                'success': True,
                'objective': bv_sbm
            }
            print("\n  ✅ Optuna FSS Successful for SBM Chi:")
            print(f"     Best Objective: {bv_sbm:.4e}")
            print(f"     p_c(SBM) ≈ {pc_opt_sbm:.6f}")
            print(f"     γ(SBM)   ≈ {format_metric(gamma_opt_sbm, '%.4f')}") # Use helper
            print(f"     ν(SBM)   ≈ {format_metric(nu_opt_sbm, '%.4f')}")   # Use helper
        else:
            print("  ❌ Optuna SBM study finished but reported no best trial.")
            global_optuna_fss_chi_sbm_results = {'success': False}

    # --- Plot SBM FSS Collapse ---
    if global_optuna_fss_chi_sbm_results.get('success', False):
        print("  Generating FSS data collapse plot for SBM Chi...")
        # Retrieve optimal parameters for plotting
        pc_sbm_plot = global_optuna_fss_chi_sbm_results['pc']
        gamma_nu_sbm_plot = global_optuna_fss_chi_sbm_results['gamma_over_nu']
        one_nu_sbm_plot = global_optuna_fss_chi_sbm_results['one_over_nu']
        nu_val_sbm_plot = global_optuna_fss_chi_sbm_results['nu']

        # Recalculate scaled variables
        scaled_x_sbm = (ps_chi_sbm - pc_sbm_plot) * (Ls_chi_sbm ** one_nu_sbm_plot)
        scaled_y_sbm = Ms_chi_sbm * (Ls_chi_sbm ** (-gamma_nu_sbm_plot))

        # Create plot
        fig_fss_sbm, ax_fss_sbm = plt.subplots(figsize=(8, 6))
        unique_Ls_sbm_plot = sorted(np.unique(Ls_chi_sbm))
        colors_sbm = plt.cm.viridis(np.linspace(0, 1, len(unique_Ls_sbm_plot)))

        # Plot data for each system size
        l_idx = 0
        while l_idx < len(unique_Ls_sbm_plot):
             L = unique_Ls_sbm_plot[l_idx]
             mask = Ls_chi_sbm == L
             ax_fss_sbm.scatter(scaled_x_sbm[mask], scaled_y_sbm[mask],
                                label=f'N={int(L)}', color=colors_sbm[l_idx], alpha=0.7, s=20)
             l_idx += 1

        # Configure plot labels and title
        # Use param_name_sbm (e.g., p_intra) in label
        sbm_param_base_name = param_name_sbm.replace('_value', '')
        xlabel_sbm = f'$({sbm_param_base_name} - p_c) N^{{1/\\nu}}$ (p$_c$≈{pc_sbm_plot:.4f}, ν≈{format_metric(nu_val_sbm_plot,"%.3f")})'
        ylabel_sbm = f'$\\chi \\times N^{{-\\gamma/\\nu}}$ (γ/ν≈{gamma_nu_sbm_plot:.3f})'
        ax_fss_sbm.set_xlabel(xlabel_sbm)
        ax_fss_sbm.set_ylabel(ylabel_sbm)
        ax_fss_sbm.set_title(f'FSS Collapse for Susceptibility χ (SBM - Optuna)')
        ax_fss_sbm.grid(True, linestyle=':')
        ax_fss_sbm.legend(title='N')
        plt.tight_layout()

        # Save plot
        fss_sbm_plot_path = os.path.join(output_dir, f"{exp_name}_SBM_Susceptibility_FSS_collapse_OPTUNA.png")
        try:
            plt.savefig(fss_sbm_plot_path, dpi=150)
            print(f"  ✅ SBM FSS Chi Collapse plot saved to: {fss_sbm_plot_path}")
        except Exception as e_save_sbm:
            print(f"  ❌ Error saving SBM FSS plot: {e_save_sbm}")
        plt.close(fig_fss_sbm) # Close plot
    else:
        print("  Skipping SBM FSS Chi collapse plot due to optimization failure.")

# --- Optional: Estimate pc from peak (might be less accurate) ---
# This block was present in the user input but seems to have a NameError.
# Fixing the reference from df_plot to fss_chi_df_sbm
if not analysis_error_sbm and not fss_chi_df_sbm.empty:
    print("\n--- Estimating p_c from SBM Susceptibility Peak (Largest N) ---")
    pc_chi_peak_sbm = np.nan
    try:
        largest_N_sbm = fss_chi_df_sbm['N'].max()
        largest_N_data_chi_sbm = fss_chi_df_sbm[fss_chi_df_sbm['N'] == largest_N_sbm]
        if not largest_N_data_chi_sbm.empty:
            # Find index of the maximum susceptibility value for the largest N
            peak_idx = largest_N_data_chi_sbm['susceptibility_chi'].idxmax()
            # Check if index is valid before using .loc
            if pd.notna(peak_idx) and peak_idx in largest_N_data_chi_sbm.index:
                # Get the corresponding parameter value (p_intra_value)
                pc_chi_peak_sbm = largest_N_data_chi_sbm.loc[peak_idx, param_name_sbm]
                print(f"    p_c(SBM) estimate from χ peak (N={largest_N_sbm}): {pc_chi_peak_sbm:.6f}")
            else:
                print(f"    Could not find valid Chi peak index for SBM (N={largest_N_sbm}).")
        else:
            print(f"    No SBM data found for N={largest_N_sbm} to estimate Chi peak.")
    except KeyError as e_peak_key:
         print(f"    Could not estimate from Chi peak: Missing column {e_peak_key}")
    except Exception as e_chi_sbm:
        print(f"    Could not estimate from Chi peak: {e_chi_sbm}")


# Final message if analysis was skipped
elif analysis_error_sbm:
     print("\n❌ Skipping SBM Analysis Steps due to configuration or diagnostic errors.")

print("\n✅ Cell 11.1: SBM Analysis completed.")

In [None]:
# Cell 11.2: Critical Point & Exponent Analysis (RGG Model - FSS on Chi with Optuna)
# Description: Analyzes RGG universality results. Calculates Susceptibility (Chi).
#              Uses Optuna to find the best FSS parameters (rc, gamma/nu, 1/nu) for Chi.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit, minimize # Keep minimize available
import warnings
import os
import traceback
import json
import optuna  # Import Optuna

# --- Suppress Optuna INFO messages ---
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("\n--- Cell 11.2: Critical Point & Exponent Analysis (RGG Model - FSS on Chi with Optuna) ---")

# --- Prerequisites & Configuration ---
analysis_error_rgg = False
# Check for config dictionary
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_rgg = True
# Check for combined universality results DataFrame
if 'global_universality_results' not in globals():
    print("❌ FATAL: Combined universality DataFrame 'global_universality_results' missing. Run Cell 11.")
    analysis_error_rgg = True
elif not isinstance(global_universality_results, pd.DataFrame):
     print("❌ FATAL: 'global_universality_results' is not a Pandas DataFrame.")
     analysis_error_rgg = True
elif global_universality_results.empty:
    print("❌ FATAL: Combined universality DataFrame 'global_universality_results' is empty.")
    analysis_error_rgg = True
elif 'RGG' not in global_universality_results['model'].unique():
    # Check if RGG model data specifically is present
    print("❌ FATAL: No 'RGG' model results found in combined universality DataFrame.")
    analysis_error_rgg = True

# Load necessary config parameters if no error yet
output_dir = None
exp_name = None
primary_metric_rgg = None
system_sizes_rgg = None
param_name_rgg = None # Specific parameter name for RGG sweep
if not analysis_error_rgg:
    try:
        output_dir = config['OUTPUT_DIR']
        exp_name = config['EXPERIMENT_NAME']
        primary_metric_rgg = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')
        system_sizes_rgg = config.get('SYSTEM_SIZES', [])
        # Dynamically find RGG sweep parameter column name
        rgg_params_cfg = config.get('GRAPH_MODEL_PARAMS', {}).get('RGG', {})
        rgg_sweep_key = next((k for k in rgg_params_cfg if k.endswith('_values')), None)
        if rgg_sweep_key:
             param_name_rgg = rgg_sweep_key.replace('_values', '_value') # e.g., 'radius_value'
        else:
             warnings.warn("Could not determine RGG sweep parameter name from config. Assuming 'radius_value'.", RuntimeWarning)
             param_name_rgg = 'radius_value'

        if not system_sizes_rgg:
            print("❌ FATAL: SYSTEM_SIZES list is empty in config.")
            analysis_error_rgg = True

    except KeyError as e_key_rgg:
        print(f"❌ FATAL: Missing key '{e_key_rgg}' in config for RGG analysis.")
        analysis_error_rgg = True
    except Exception as e_conf_rgg:
        print(f"❌ FATAL: Error loading config for RGG analysis: {e_conf_rgg}.")
        analysis_error_rgg = True

# --- Initialize results dictionary for this cell ---
global_optuna_fss_chi_rgg_results = {}

# --- Filter and Diagnose RGG Data ---
rgg_results_df = pd.DataFrame() # Initialize empty DataFrame
if not analysis_error_rgg:
    print(f"\n--- Step 11.2.1: Filtering and Diagnosing RGG Input Data ---")
    # Filter the combined DataFrame for RGG model results
    rgg_results_df = global_universality_results[global_universality_results['model'] == 'RGG'].copy()

    if rgg_results_df.empty:
        analysis_error_rgg = True
        print("❌ FATAL: RGG results DataFrame is empty after filtering.")
    else:
        print(f"  Filtered RGG DataFrame Shape: {rgg_results_df.shape}")
        # Check required columns for RGG analysis
        required_cols_rgg = ['N', param_name_rgg, primary_metric_rgg, 'instance', 'trial']
        missing_cols_rgg = []
        col_idx = 0
        while col_idx < len(required_cols_rgg):
             col = required_cols_rgg[col_idx]
             if col not in rgg_results_df.columns:
                  missing_cols_rgg.append(col)
             col_idx += 1

        if len(missing_cols_rgg) > 0:
            analysis_error_rgg = True
            print(f"❌ FATAL: RGG data missing required columns: {missing_cols_rgg}.")
        else:
            print(f"  Required columns found: {required_cols_rgg}")
            # Check unique system sizes (N) for RGG
            unique_N_rgg = rgg_results_df['N'].unique()
            print(f"  Unique 'N' for RGG: {sorted(unique_N_rgg)}")
            if len(unique_N_rgg) < 2:
                analysis_error_rgg = True
                print(f"❌ FATAL: Need >= 2 unique 'N' for RGG FSS. Found {len(unique_N_rgg)}.")
            else:
                print("  Sufficient unique 'N' values for RGG FSS.")
                # Diagnose primary metric column
                metric_col_rgg = rgg_results_df[primary_metric_rgg]
                total_rgg = len(metric_col_rgg)
                non_nan_rgg = metric_col_rgg.notna().sum()
                nan_rgg = metric_col_rgg.isna().sum()
                print(f"  RGG Diagnostics for '{primary_metric_rgg}': Total={total_rgg}, Non-NaN={non_nan_rgg}, NaN={nan_rgg}")
                if non_nan_rgg == 0:
                    analysis_error_rgg = True
                    print(f"❌ FATAL: RGG Column '{primary_metric_rgg}' contains only NaNs.")
                else:
                    print("✅ RGG Data seems valid for moment calculation.")

# --- Aggregate Susceptibility for RGG ---
fss_chi_df_rgg = pd.DataFrame() # Initialize empty dataframe
if not analysis_error_rgg:
    print(f"\n--- Step 11.2.2: Aggregating RGG Susceptibility (χ) ---")
    try:
        # Calculate variance of the primary order parameter (M)
        M_numeric_rgg = pd.to_numeric(rgg_results_df[primary_metric_rgg], errors='coerce')
        var_M_rgg = rgg_results_df.assign(M_numeric_rgg=M_numeric_rgg).groupby(['N', param_name_rgg], observed=True)['M_numeric_rgg'].var()

        if var_M_rgg.isna().any():
            nan_groups_rgg = var_M_rgg[var_M_rgg.isna()].index.tolist()
            warnings.warn(f"NaNs found in RGG Var(M) calc for {len(nan_groups_rgg)} groups. Dropping.", RuntimeWarning)

        # Calculate Susceptibility: χ = N * Var(M)
        susceptibility_chi_agg_rgg = var_M_rgg.index.get_level_values('N') * var_M_rgg
        # Combine into DataFrame and drop rows with NaN susceptibility
        fss_chi_df_rgg = pd.DataFrame({'susceptibility_chi': susceptibility_chi_agg_rgg}).reset_index().dropna()

        # Check validity of the aggregated DataFrame
        if fss_chi_df_rgg.empty:
             raise ValueError("RGG Chi DataFrame empty after aggregation/dropna.")
        if fss_chi_df_rgg['N'].nunique() < 2:
            raise ValueError(f"RGG Chi DataFrame has < 2 unique sizes ({fss_chi_df_rgg['N'].unique()}) after aggregation/dropna.")

        print(f"  Aggregated RGG Susceptibility ready for FSS (Entries: {len(fss_chi_df_rgg)}).")
        # print("  Sample RGG aggregated data:\n", fss_chi_df_rgg.head())

    except KeyError as e_agg_key_rgg:
        print(f"❌ Error aggregating RGG Chi: Missing column {e_agg_key_rgg}")
        analysis_error_rgg = True
    except Exception as agg_chi_e_rgg:
        print(f"❌ Error aggregating RGG Chi: {agg_chi_e_rgg}")
        traceback.print_exc(limit=1)
        analysis_error_rgg = True

# --- FSS on RGG Susceptibility using Optuna ---
if not analysis_error_rgg:
    print(f"\n--- Step 11.2.3: FSS on RGG Susceptibility using Optuna ---")
    # Prepare data for Optuna objective function
    try:
        Ls_chi_rgg = fss_chi_df_rgg['N'].values.astype(np.float64)
        # Use RGG parameter column (e.g., radius_value)
        ps_chi_rgg = fss_chi_df_rgg[param_name_rgg].values.astype(np.float64)
        Ms_chi_rgg = fss_chi_df_rgg['susceptibility_chi'].values.astype(np.float64)
    except KeyError as e_fss_prep_rgg:
         print(f"❌ Error preparing RGG FSS data: Missing column {e_fss_prep_rgg}.")
         analysis_error_rgg = True
    except Exception as e_fss_prep_other_rgg:
         print(f"❌ Error preparing RGG FSS data: {e_fss_prep_other_rgg}.")
         analysis_error_rgg = True

    # Define Optuna Objective Function (reusable structure, uses RGG data here)
    def objective_fss_chi_rgg(trial):
        # Suggest parameters for RGG (critical radius 'rc')
        # Adjust range based on sweep range and expected rc location
        pc = trial.suggest_float("rc", 0.05, 0.5) # 'rc' is the critical radius
        gamma_nu = trial.suggest_float("gamma_over_nu", 0.1, 3.0)
        one_nu = trial.suggest_float("one_over_nu", 0.1, 5.0)

        # --- Calculate scaled variables & collapse error using RGG data ---
        scaled_x = (ps_chi_rgg - pc) * (Ls_chi_rgg ** one_nu) # ps_chi_rgg holds radius values here
        scaled_y = Ms_chi_rgg * (Ls_chi_rgg ** (-gamma_nu))

        # Sort by scaled_x for binning
        sorted_indices = np.argsort(scaled_x)
        scaled_x_sorted = scaled_x[sorted_indices]
        scaled_y_sorted = scaled_y[sorted_indices]

        # --- Calculate Collapse Error using Binning (same logic as Cell 9) ---
        total_error = 0.0; num_bins = 20;
        try:
            valid_indices = np.isfinite(scaled_x_sorted) & np.isfinite(scaled_y_sorted)
            if not np.any(valid_indices): return np.inf

            scaled_x_finite = scaled_x_sorted[valid_indices]
            scaled_y_finite = scaled_y_sorted[valid_indices]
            num_valid_points = len(scaled_x_finite)
            if num_valid_points < num_bins: num_bins = max(1, num_valid_points // 2)

            min_x = np.min(scaled_x_finite); max_x = np.max(scaled_x_finite)
            if abs(min_x - max_x) < 1e-9: return np.var(scaled_y_finite) if num_valid_points > 1 else 0.0

            bins = np.linspace(min_x, max_x, num_bins + 1)
            bin_indices = np.digitize(scaled_x_finite, bins)
            non_empty_bin_count = 0
            bin_idx = 1
            while bin_idx <= num_bins:
                y_in_bin = scaled_y_finite[bin_indices == bin_idx]
                if len(y_in_bin) > 1:
                    total_error += np.var(y_in_bin); non_empty_bin_count += 1
                bin_idx += 1

            if non_empty_bin_count > 0: return total_error / non_empty_bin_count
            else: return np.inf
        except Exception: return np.inf

    # --- Run Optuna Study for RGG ---
    n_optuna_trials_rgg = 100 # Number of trials for RGG optimization
    print(f"  Running Optuna study ({n_optuna_trials_rgg} trials) for RGG Chi...")
    study_chi_rgg = optuna.create_study(direction='minimize')
    optimization_success_rgg = False
    try:
        study_chi_rgg.optimize(objective_fss_chi_rgg, n_trials=n_optuna_trials_rgg, show_progress_bar=True)
        optimization_success_rgg = True
    except Exception as optuna_err_rgg:
        print(f"❌ Error during Optuna RGG optimization: {optuna_err_rgg}")
        traceback.print_exc(limit=1)
        global_optuna_fss_chi_rgg_results = {'success': False} # Store failure
        analysis_error_rgg = True # Mark analysis as failed

    # --- Process and Store Best RGG Results ---
    if optimization_success_rgg:
        if study_chi_rgg.best_trial:
            bp_rgg = study_chi_rgg.best_params # Best parameters found
            bv_rgg = study_chi_rgg.best_value   # Best objective value

            pc_opt_rgg = bp_rgg['rc'] # Critical radius
            gamma_nu_opt_rgg = bp_rgg['gamma_over_nu']
            one_nu_opt_rgg = bp_rgg['one_over_nu']

            # Calculate original exponents
            nu_opt_rgg = np.nan; gamma_opt_rgg = np.nan
            if abs(one_nu_opt_rgg) > 1e-6:
                nu_opt_rgg = 1.0 / one_nu_opt_rgg
                gamma_opt_rgg = gamma_nu_opt_rgg * nu_opt_rgg
            else: warnings.warn("RGG Optuna result 1/nu too close to zero.", RuntimeWarning)

            # Store results
            global_optuna_fss_chi_rgg_results = {
                'pc': pc_opt_rgg, # Storing critical radius under 'pc' key for consistency
                'gamma': gamma_opt_rgg,
                'nu': nu_opt_rgg,
                'gamma_over_nu': gamma_nu_opt_rgg,
                'one_over_nu': one_nu_opt_rgg,
                'success': True,
                'objective': bv_rgg
            }
            # Use helper from Cell 9 if available, otherwise simple format
            if 'format_metric' not in globals(): format_metric = lambda v,f: f"%.{f[-2]}f" % v if pd.notna(v) else "N/A"

            print("\n  ✅ Optuna FSS Successful for RGG Chi:")
            print(f"     Best Objective: {bv_rgg:.4e}")
            print(f"     r_c(RGG) ≈ {pc_opt_rgg:.6f}") # Use r_c in printout
            print(f"     γ(RGG)   ≈ {format_metric(gamma_opt_rgg, '%.4f')}")
            print(f"     ν(RGG)   ≈ {format_metric(nu_opt_rgg, '%.4f')}")
        else:
            print("  ❌ Optuna RGG study finished but reported no best trial.")
            global_optuna_fss_chi_rgg_results = {'success': False}

    # --- Plot RGG FSS Collapse ---
    if global_optuna_fss_chi_rgg_results.get('success', False):
        print("  Generating FSS data collapse plot for RGG Chi...")
        # Retrieve optimal parameters for plotting
        pc_rgg_plot = global_optuna_fss_chi_rgg_results['pc'] # Critical radius
        gamma_nu_rgg_plot = global_optuna_fss_chi_rgg_results['gamma_over_nu']
        one_nu_rgg_plot = global_optuna_fss_chi_rgg_results['one_over_nu']
        nu_val_rgg_plot = global_optuna_fss_chi_rgg_results['nu']

        # Recalculate scaled variables using radius values (ps_chi_rgg)
        scaled_x_rgg = (ps_chi_rgg - pc_rgg_plot) * (Ls_chi_rgg ** one_nu_rgg_plot)
        scaled_y_rgg = Ms_chi_rgg * (Ls_chi_rgg ** (-gamma_nu_rgg_plot))

        # Create plot
        fig_fss_rgg, ax_fss_rgg = plt.subplots(figsize=(8, 6))
        unique_Ls_rgg_plot = sorted(np.unique(Ls_chi_rgg))
        colors_rgg = plt.cm.viridis(np.linspace(0, 1, len(unique_Ls_rgg_plot)))

        # Plot data for each system size
        l_idx = 0
        while l_idx < len(unique_Ls_rgg_plot):
             L = unique_Ls_rgg_plot[l_idx]
             mask = Ls_chi_rgg == L
             ax_fss_rgg.scatter(scaled_x_rgg[mask], scaled_y_rgg[mask],
                                label=f'N={int(L)}', color=colors_rgg[l_idx], alpha=0.7, s=20)
             l_idx += 1

        # Configure plot labels and title using 'r' for radius
        rgg_param_base_name = param_name_rgg.replace('_value', '') # Should be 'radius'
        xlabel_rgg = f'$({rgg_param_base_name} - r_c) N^{{1/\\nu}}$ (r$_c$≈{pc_rgg_plot:.4f}, ν≈{format_metric(nu_val_rgg_plot,"%.3f")})'
        ylabel_rgg = f'$\\chi \\times N^{{-\\gamma/\\nu}}$ (γ/ν≈{gamma_nu_rgg_plot:.3f})'
        ax_fss_rgg.set_xlabel(xlabel_rgg)
        ax_fss_rgg.set_ylabel(ylabel_rgg)
        ax_fss_rgg.set_title(f'FSS Collapse for Susceptibility χ (RGG - Optuna)')
        ax_fss_rgg.grid(True, linestyle=':')
        ax_fss_rgg.legend(title='N')
        plt.tight_layout()

        # Save plot
        fss_rgg_plot_path = os.path.join(output_dir, f"{exp_name}_RGG_Susceptibility_FSS_collapse_OPTUNA.png")
        try:
            plt.savefig(fss_rgg_plot_path, dpi=150)
            print(f"  ✅ RGG FSS Chi Collapse plot saved to: {fss_rgg_plot_path}")
        except Exception as e_save_rgg:
            print(f"  ❌ Error saving RGG FSS plot: {e_save_rgg}")
        plt.close(fig_fss_rgg) # Close plot
    else:
        print("  Skipping RGG FSS Chi collapse plot due to optimization failure.")

# --- Optional: Estimate rc from peak ---
# Fixing NameError as in SBM cell
if not analysis_error_rgg and not fss_chi_df_rgg.empty:
    print("\n--- Estimating r_c from RGG Susceptibility Peak (Largest N) ---")
    pc_chi_peak_rgg = np.nan # Use 'pc' prefix for consistency internally
    try:
        largest_N_rgg = fss_chi_df_rgg['N'].max()
        largest_N_data_chi_rgg = fss_chi_df_rgg[fss_chi_df_rgg['N'] == largest_N_rgg]
        if not largest_N_data_chi_rgg.empty:
            peak_idx_rgg = largest_N_data_chi_rgg['susceptibility_chi'].idxmax()
            if pd.notna(peak_idx_rgg) and peak_idx_rgg in largest_N_data_chi_rgg.index:
                # Get the corresponding radius value
                pc_chi_peak_rgg = largest_N_data_chi_rgg.loc[peak_idx_rgg, param_name_rgg]
                print(f"    r_c(RGG) estimate from χ peak (N={largest_N_rgg}): {pc_chi_peak_rgg:.6f}")
            else:
                print(f"    Could not find valid Chi peak index for RGG (N={largest_N_rgg}).")
        else:
            print(f"    No RGG data found for N={largest_N_rgg} to estimate Chi peak.")
    except KeyError as e_peak_key_rgg:
         print(f"    Could not estimate from Chi peak: Missing column {e_peak_key_rgg}")
    except Exception as e_chi_rgg:
        print(f"    Could not estimate from Chi peak: {e_chi_rgg}")


# Final message if analysis was skipped
elif analysis_error_rgg:
     print("\n❌ Skipping RGG Analysis Steps due to configuration or diagnostic errors.")

print("\n✅ Cell 11.2: RGG Analysis completed.")

In [None]:
# Cell 11.3: Universality Class Comparison (Using Optuna Chi FSS Results)
# Description: Compares the critical exponents (gamma, nu) estimated via Optuna FSS
#              on Susceptibility (Chi) for WS, SBM, and RGG models to assess universality.

import pandas as pd
import numpy as np
import os
import json
import warnings # Import warnings

print("\n--- Cell 11.3: Universality Class Comparison (Using Optuna Chi FSS Results) ---")

# --- Helper Function (redefine or ensure available) ---
def format_metric(value, fmt):
    """Safely formats a numerical value using a format string."""
    is_valid_number = False
    if value is not None:
        # Check if it's an int or float, and finite
        if isinstance(value, (int, float)) and np.isfinite(value):
            is_valid_number = True
    if is_valid_number:
        try:
            # Attempt to format the number
            return fmt % value
        except (TypeError, ValueError):
            # Return error string if formatting fails
            return "Format Error"
    else:
        # Return N/A string for non-numbers or non-finite numbers
        return "N/A"

# --- Prerequisites ---
comparison_error = False
results_store_chi = {} # Store results specifically from Chi FSS

# Check WS Results from Cell 9
ws_results_valid = False
# Check if the global variable exists and is a dictionary
if 'global_optuna_fss_chi_results' in globals() and isinstance(global_optuna_fss_chi_results, dict):
     # Check if the 'success' key is present and True
     if global_optuna_fss_chi_results.get('success', False):
          results_store_chi['WS'] = global_optuna_fss_chi_results
          ws_results_valid = True
     else:
          # Print warning if success flag is False
          print("⚠️ WS Optuna Chi FSS results indicate failure (success=False).")
else:
     # Print warning if variable is missing or wrong type
     print("⚠️ WS Optuna Chi FSS results ('global_optuna_fss_chi_results') missing or invalid type.")

# Check SBM Results from Cell 11.1
sbm_results_valid = False
if 'global_optuna_fss_chi_sbm_results' in globals() and isinstance(global_optuna_fss_chi_sbm_results, dict):
     if global_optuna_fss_chi_sbm_results.get('success', False):
          results_store_chi['SBM'] = global_optuna_fss_chi_sbm_results
          sbm_results_valid = True
     else: print("⚠️ SBM Optuna Chi FSS results indicate failure (success=False).")
else: print("⚠️ SBM Optuna Chi FSS results ('global_optuna_fss_chi_sbm_results') missing or invalid type.")

# Check RGG Results from Cell 11.2
rgg_results_valid = False
if 'global_optuna_fss_chi_rgg_results' in globals() and isinstance(global_optuna_fss_chi_rgg_results, dict):
     if global_optuna_fss_chi_rgg_results.get('success', False):
          results_store_chi['RGG'] = global_optuna_fss_chi_rgg_results
          rgg_results_valid = True
     else: print("⚠️ RGG Optuna Chi FSS results indicate failure (success=False).")
else: print("⚠️ RGG Optuna Chi FSS results ('global_optuna_fss_chi_rgg_results') missing or invalid type.")

# Check if enough results are available for comparison
successful_models_count = len(results_store_chi)
if successful_models_count < 2:
     print(f"❌ Need successful Optuna Chi FSS results from at least two models for comparison (Found {successful_models_count}).")
     comparison_error = True

# Load config if needed for output paths
output_dir = None; exp_name = None
if 'config' in globals() and isinstance(config, dict):
     try:
          output_dir = config['OUTPUT_DIR']; exp_name = config['EXPERIMENT_NAME']
     except KeyError:
          comparison_error = True; print("❌ Config missing OUTPUT_DIR or EXPERIMENT_NAME.")
else:
     comparison_error = True; print("❌ Config dictionary missing.")

# --- Compare Exponents ---
if not comparison_error:
    print("\n--- Comparing Critical Exponents (γ, ν) Across Models (from Chi FSS) ---")
    comparison_data = [] # List to store dictionaries for DataFrame
    gamma_values_comp = [] # List to store valid gamma values for stats
    nu_values_comp = []    # List to store valid nu values for stats
    models_compared = list(results_store_chi.keys()) # Get list of models with results

    # Iterate through the stored results for each successful model
    model_idx = 0
    while model_idx < len(models_compared):
        model = models_compared[model_idx]
        results = results_store_chi[model] # Get the results dict for the model

        # Extract exponents and critical point safely using .get()
        gamma = results.get('gamma', np.nan)
        nu = results.get('nu', np.nan)
        pc = results.get('pc', np.nan) # Critical point (p_c, p_c(SBM), r_c)
        obj = results.get('objective', np.nan) # Optuna objective value

        # Append formatted data for the comparison table
        comparison_data.append({
            'Model': model,
            # Use param name consistent with model if possible (p_c, p_c(SBM), r_c)
            'Critical Point': format_metric(pc, '%.5f'),
            'Gamma (γ)': format_metric(gamma, '%.3f'),   # Format gamma
            'Nu (ν)': format_metric(nu, '%.3f'),        # Format nu
            'Optuna Objective': format_metric(obj, '%.2e') # Format objective value
        })

        # Add valid exponents to lists for statistical comparison
        # Check using pd.notna which handles None and np.nan
        if pd.notna(gamma):
            gamma_values_comp.append(gamma)
        if pd.notna(nu):
            nu_values_comp.append(nu)

        model_idx += 1 # Increment loop counter

    # Create and print the comparison DataFrame
    comparison_df = pd.DataFrame(comparison_data)
    # Use to_string() for better console formatting without truncation
    print(comparison_df.to_string(index=False))

    # --- Quantitative Comparison (Relative Standard Deviation - RSD) ---
    print("\n  Quantitative Assessment (RSD):")
    # Calculate Gamma stats if enough values (at least 2)
    gamma_mean = np.nan; gamma_std = np.nan; gamma_rsd = np.inf # Initialize stats
    if len(gamma_values_comp) >= 2:
        gamma_mean = np.mean(gamma_values_comp); gamma_std = np.std(gamma_values_comp)
        # Calculate RSD = (StdDev / |Mean|) * 100%, handle potential zero mean
        if gamma_mean != 0 and pd.notna(gamma_mean) and pd.notna(gamma_std):
             # Ensure calculation is done using floats
             gamma_rsd = (float(gamma_std) / abs(float(gamma_mean))) * 100.0
        print(f"  Gamma (γ): Mean={format_metric(gamma_mean, '%.3f')}, StdDev={format_metric(gamma_std, '%.3f')}, RSD={format_metric(gamma_rsd, '%.1f')}%")
        # Interpretation based on RSD threshold (e.g., 15-25%)
        if gamma_rsd < 15.0:
            print("    Suggests reasonable consistency for Gamma.")
        elif gamma_rsd < 25.0:
            print("    Suggests potential moderate differences for Gamma.")
        else: # High RSD indicates likely distinct classes
            print("    Suggests significant differences for Gamma (distinct classes).")
    else:
        print("  Gamma (γ): Cannot perform quantitative comparison (need ≥ 2 valid estimates).")

    # Calculate Nu stats if enough values
    nu_mean = np.nan; nu_std = np.nan; nu_rsd = np.inf # Initialize stats
    if len(nu_values_comp) >= 2:
        nu_mean = np.mean(nu_values_comp); nu_std = np.std(nu_values_comp)
        # Calculate RSD for Nu
        if nu_mean != 0 and pd.notna(nu_mean) and pd.notna(nu_std):
             nu_rsd = (float(nu_std) / abs(float(nu_mean))) * 100.0
        print(f"  Nu (ν):    Mean={format_metric(nu_mean, '%.3f')}, StdDev={format_metric(nu_std, '%.3f')}, RSD={format_metric(nu_rsd, '%.1f')}%")
        # Interpretation based on RSD threshold
        if nu_rsd < 15.0:
            print("    Suggests reasonable consistency for Nu.")
        elif nu_rsd < 25.0:
            print("    Suggests potential moderate differences for Nu.")
        else: # High RSD
            print("    Suggests significant differences for Nu (distinct classes).")
    else:
        print("  Nu (ν):    Cannot perform quantitative comparison (need ≥ 2 valid estimates).")

    # --- Conclusion based on combined RSD analysis ---
    print("\n  Preliminary Universality Conclusion (based on Chi FSS):")
    # Determine overall consistency based on thresholds for BOTH exponents
    # Using a stricter threshold (e.g., 20-25%) to declare distinct classes based on Phase 1 findings
    gamma_likely_distinct = pd.isna(gamma_rsd) or gamma_rsd > 25.0
    nu_likely_distinct = pd.isna(nu_rsd) or nu_rsd > 25.0

    # Check if *either* exponent suggests distinct classes
    if gamma_likely_distinct or nu_likely_distinct:
         print("    ❌ Significant variation observed in at least one critical exponent.")
         print(f"       (RSDs: Gamma={format_metric(gamma_rsd, '%.1f')}%, Nu={format_metric(nu_rsd, '%.1f')}%)")
         print("       Evidence strongly suggests models belong to DISTINCT universality classes.")
    # Check if both are reasonably consistent (low RSD)
    elif gamma_rsd < 15.0 and nu_rsd < 15.0: # Need low RSD for both to suggest same class
         print("    ✅ Low variation in exponents observed.")
         print("       Evidence supports a single universality class across tested models,")
         print(f"       characterized by γ ≈ {format_metric(gamma_mean, '%.3f')} and ν ≈ {format_metric(nu_mean, '%.3f')}.")
    else: # Intermediate case - results are borderline or ambiguous
         print("    🟡 Moderate or ambiguous variation in exponents.")
         print(f"       (RSDs: Gamma={format_metric(gamma_rsd, '%.1f')}%, Nu={format_metric(nu_rsd, '%.1f')}%)")
         print("       Universality is questionable; distinct classes remain likely.")


    # Save comparison table to CSV if path is valid
    if output_dir is not None and exp_name is not None:
        comp_table_path = os.path.join(output_dir, f"{exp_name}_universality_exponent_comparison_CHI.csv")
        try:
            comparison_df.to_csv(comp_table_path, index=False)
            print(f"\n✅ Chi exponent comparison table saved to: {comp_table_path}")
        except Exception as e_save_comp:
            print(f"❌ Error saving comparison table: {e_save_comp}")
    else:
        print("\n⚠️ Could not save comparison table (output path or experiment name missing).")

else:
    print("❌ Skipping universality comparison due to missing results or configuration errors.")

print("\n✅ Cell 11.3: Universality Class Comparison completed.")

In [None]:
# Cell 11.4: Energy Functional Analysis (Lyapunov Check - Final)
# Description: Analyzes simulation results (combined if available) to check if the
#              energy functional behaves like a Lyapunov function. Requires energy
#              history to be stored during simulation for monotonicity check.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import warnings

print("\n--- Cell 11.4: Energy Functional Analysis (Lyapunov Check - Final) ---")

# --- Prerequisites ---
analysis_error_energy = False
# Check for config dictionary
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_energy = True
else:
    # Load flags from config
    config = globals()['config']
    calculate_energy_flag = config.get('CALCULATE_ENERGY', False)
    # Check if energy history was intended to be stored
    store_history_flag = config.get('STORE_ENERGY_HISTORY', False)
    energy_functional_type = config.get('ENERGY_FUNCTIONAL_TYPE', 'N/A')

    # Skip analysis if energy calculation was disabled during the runs
    if not calculate_energy_flag:
        print("ℹ️ Skipping Energy Analysis: CALCULATE_ENERGY was False during sweeps.")
        analysis_error_energy = True

# Find the relevant results DataFrame
results_df_energy = pd.DataFrame() # Initialize empty DataFrame
source_data_name = "No Data"
if not analysis_error_energy:
    # Prioritize combined universality results if available (contains all models)
    if 'global_universality_results' in globals() and isinstance(global_universality_results, pd.DataFrame) and not global_universality_results.empty:
        results_df_energy = global_universality_results
        source_data_name = "Combined Universality Results (All Models)"
    # Fallback to primary WS sweep results if universality results are missing
    elif 'global_sweep_results' in globals() and isinstance(global_sweep_results, pd.DataFrame) and not global_sweep_results.empty:
        results_df_energy = global_sweep_results
        source_data_name = "Primary WS Sweep Results"
    else:
        # If no suitable DataFrame is found
        print("❌ Cannot analyze energy: No suitable results DataFrame found ('global_universality_results' or 'global_sweep_results'). Run Cell 8 or 11.")
        analysis_error_energy = True


# --- Analyze Energy Data ---
if not analysis_error_energy:
    print(f"  Using data source: {source_data_name}")
    print(f"  Analyzing energy functional type: {energy_functional_type}")

    # Define expected column names
    energy_col = 'final_energy'
    monotonic_col = 'energy_monotonic'

    # --- Final Energy Statistics ---
    # Check if the final energy column exists first
    if energy_col not in results_df_energy.columns:
        print(f"❌ Cannot analyze final energy: Column ('{energy_col}') not found in DataFrame.")
        # Continue to check monotonicity if flag was set, but cannot analyze final energy values
    else:
         print(f"\n  Final Energy Statistics:")
         num_total_runs = len(results_df_energy)
         # Count runs where final energy is a valid number (not NaN)
         valid_energy_runs = results_df_energy[energy_col].notna().sum()
         print(f"    Total Simulation Runs in DataFrame: {num_total_runs}")
         print(f"    Runs with Valid Final Energy: {valid_energy_runs}")

         if valid_energy_runs > 0:
             # Calculate mean and standard deviation of valid final energies
             mean_final_energy = results_df_energy[energy_col].mean()
             std_final_energy = results_df_energy[energy_col].std()
             min_final_energy = results_df_energy[energy_col].min()
             max_final_energy = results_df_energy[energy_col].max()
             print(f"    Mean Final Energy: {mean_final_energy:.4f}")
             print(f"    Std Dev Final Energy: {std_final_energy:.4f}")
             print(f"    Min Final Energy: {min_final_energy:.4f}")
             print(f"    Max Final Energy: {max_final_energy:.4f}")
         else:
              print("    No valid final energy values found to calculate statistics.")

    # --- Analyze Monotonicity (Lyapunov Check) ---
    print("\n  Lyapunov Behavior Statistics (Monotonicity Check):")
    # Check if energy history was supposed to be stored
    if not store_history_flag:
        print("    Monotonicity check skipped: STORE_ENERGY_HISTORY was False during sweeps.")
    # Check if the monotonicity result column exists
    elif monotonic_col not in results_df_energy.columns:
        print(f"    ⚠️ Cannot analyze energy monotonicity: Column ('{monotonic_col}') not found.")
        print("       (Check if `run_single_instance` correctly calculates and returns this column when store_energy_history=True).")
    else:
        # Proceed with analysis if column exists
        # Count runs where monotonicity check result is valid (not NaN)
        valid_monotonic_checks = results_df_energy[monotonic_col].notna().sum()
        if valid_monotonic_checks > 0:
             # Count runs where energy was monotonic (True values)
             # Assuming boolean True/False stored, sum() treats True as 1, False as 0
             # Handle potential non-boolean types gracefully
             try:
                  # Attempt boolean conversion and sum, filter out NaNs first
                  num_monotonic = results_df_energy[monotonic_col].dropna().astype(bool).sum()
             except (TypeError, ValueError):
                  # Fallback if conversion fails (e.g., unexpected strings)
                  warnings.warn("Could not reliably convert 'energy_monotonic' column to boolean for counting.", RuntimeWarning)
                  num_monotonic = "Error" # Indicate counting failure

             # Calculate fraction if counting was successful
             monotonic_fraction = "N/A"
             if isinstance(num_monotonic, (int, float)): # Check if count is a number
                   monotonic_fraction = num_monotonic / valid_monotonic_checks
                   print(f"    Total Runs with Valid Monotonicity Check: {valid_monotonic_checks}")
                   print(f"    Runs with Monotonic/Stable Energy: {num_monotonic}")
                   print(f"    Fraction Monotonic/Stable: {monotonic_fraction:.4f}")
                   # Interpretation based on the fraction
                   if monotonic_fraction > 0.95:
                       print("    ✅ High fraction strongly supports Lyapunov-like behavior for the calculated energy.")
                   elif monotonic_fraction > 0.8:
                       print("    🟡 Moderate fraction suggests generally Lyapunov-like behavior, with some exceptions or noise.")
                   else:
                       print("    ❌ Low fraction suggests the calculated energy functional ('{energy_functional_type}') is not consistently Lyapunov-like for these dynamics/parameters.")
             else:
                   # If num_monotonic is "Error"
                   print(f"    Could not calculate monotonic fraction due to data type issues in '{monotonic_col}'.")

        else:
             # If no valid monotonicity checks were found (all NaN)
             print("    No valid monotonicity checks found (all values might be NaN).")


    # --- Mathematical Argument (Placeholder) ---
    # Keep conceptual explanation from Phase 1 summary
    print("\n  Mathematical Argument (Conceptual):")
    print("    A formal proof that the energy functional is a strict Lyapunov function for")
    print("    these complex, stochastic dynamics remains challenging.")
    print("    The empirical monotonicity check provides evidence but is not definitive proof.")
    print("    Factors like noise, discrete updates, and boundary effects can influence behavior.")

else:
    # This block executes if analysis_error_energy was True initially
    print("❌ Skipping energy functional analysis due to configuration errors or lack of data.")

print("\n✅ Cell 11.4: Energy Functional Analysis completed.")

In [None]:
# Cell 11.5: Rule Parameter Sensitivity Analysis (GPU - Final Attempt, Simplified Imports)
# Description: Explicitly loads config, uses correct worker count, runs sweeps using
#              imported worker function. Ensures all local helper functions are defined.
#              All logic fully expanded.

import pandas as pd
import numpy as np
import networkx as nx
import time
import os
import pickle
import itertools
import warnings
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import multiprocessing as mp
import torch
import traceback
import json

# *** Import ONLY the worker function from the external file ***
# Try importing Phase 1 worker first as that was used in the provided output
worker_func_sens = None
try:
    from worker_utils import run_single_instance
    worker_func_sens = run_single_instance
    print("✅ Imported Phase 1 worker: run_single_instance for sensitivity analysis.")
except ImportError:
    # Fallback to Phase 2 worker if Phase 1 worker isn't available
    if 'run_single_instance_phase2' in globals():
         print("⚠️ WARNING: Phase 1 worker 'run_single_instance' not found. Using Phase 2 worker 'run_single_instance_phase2' for sensitivity.")
         worker_func_sens = run_single_instance_phase2
    else:
         raise ImportError("❌ ERROR: Could not import 'run_single_instance' from worker_utils.py or find 'run_single_instance_phase2' in scope.")

# --- Define Helper functions NEEDED LOCALLY in THIS cell ---
# (Copied from Cell 2 definitions to ensure they exist in this scope)
# Note: Ensure these match the versions used successfully in Phase 1/Cell 2

def get_sweep_parameters(graph_model_name, model_params, system_sizes, instances, trials, sensitivity_param=None, sensitivity_values=None):
    """Generates parameter dictionaries for simulation tasks, ensuring primary sweep param is always included."""
    all_task_params = []; base_seed = int(time.time() * 1000) % 100000; param_counter = 0
    primary_param_key = None; primary_param_name = None; primary_param_values = None; fixed_params = {}
    for key, values in model_params.items():
        is_sweep_candidate = False
        if isinstance(values, (list, np.ndarray)):
             if len(values) > 0: is_sweep_candidate = True
        if is_sweep_candidate:
            primary_param_key = key; primary_param_name = key.replace('_values', ''); primary_param_values = values
        else: fixed_params[key] = values
    if primary_param_key is None:
        if graph_model_name == 'RGG' and 'radius_values' in model_params: primary_param_key = 'radius_values'; primary_param_name = 'radius'; primary_param_values = model_params['radius_values']
        elif graph_model_name == 'SBM' and 'p_intra_values' in model_params: primary_param_key = 'p_intra_values'; primary_param_name = 'p_intra'; primary_param_values = model_params['p_intra_values']
        elif graph_model_name == 'WS' and 'p_values' in model_params: primary_param_key = 'p_values'; primary_param_name = 'p'; primary_param_values = model_params['p_values']
        else: primary_param_name = 'param'; primary_param_key = 'param_values'; primary_param_values = [0]; warnings.warn(f"Sweep param not found for {graph_model_name}. Using dummy 'param'.", RuntimeWarning)
    if not isinstance(primary_param_values, (list, np.ndarray)): primary_param_values = [primary_param_values]
    primary_param_col_name = primary_param_name + '_value'
    sens_loop_values = []
    if sensitivity_param is not None and sensitivity_values is not None:
        if isinstance(sensitivity_values, (list, np.ndarray)): sens_loop_values = sensitivity_values if len(sensitivity_values) > 0 else [None]
        else: sens_loop_values = [sensitivity_values]
    else: sens_loop_values = [None]
    n_idx = 0
    while n_idx < len(system_sizes):
        N = system_sizes[n_idx]; p_val_idx = 0
        while p_val_idx < len(primary_param_values):
             p_val = primary_param_values[p_val_idx]; sens_val_idx = 0
             while sens_val_idx < len(sens_loop_values):
                 sens_val = sens_loop_values[sens_val_idx]; inst_idx = 0
                 while inst_idx < instances:
                     graph_seed = base_seed + param_counter + inst_idx * 13 + n_idx * 100 + p_val_idx * 10 + sens_val_idx
                     trial_idx = 0
                     while trial_idx < trials:
                         sim_seed = base_seed + param_counter + inst_idx * 101 + trial_idx * 7 + n_idx * 1000 + p_val_idx * 100 + sens_val_idx
                         task = {'model': graph_model_name, 'N': N, 'fixed_params': fixed_params.copy(),
                                 primary_param_col_name: p_val, 'instance': inst_idx, 'trial': trial_idx,
                                 'graph_seed': graph_seed, 'sim_seed': sim_seed,
                                 'rule_param_name': sensitivity_param, 'rule_param_value': sens_val }
                         all_task_params.append(task); param_counter += 1; trial_idx += 1
                     inst_idx += 1
                 sens_val_idx += 1
             p_val_idx += 1
        n_idx += 1
    return all_task_params

def generate_graph(model_name, params, N, seed):
    """Generates a graph using NetworkX."""
    np.random.seed(seed); G = nx.Graph()
    try:
        gen_params = params.copy(); base_param_name = next((k.replace('_value','') for k in gen_params if k.endswith('_value')), None)
        if base_param_name and base_param_name+'_value' in gen_params: gen_params[base_param_name] = gen_params.pop(base_param_name+'_value')
        if model_name == 'WS':
            k = gen_params.get('k_neighbors', 4); p_rewire = gen_params.get('p', 0.1); k = int(k); k = max(2, k if k % 2 == 0 else k - 1); k = min(k, N - 1)
            if N > k: G = nx.watts_strogatz_graph(n=N, k=k, p=p_rewire, seed=seed)
            else: G = nx.complete_graph(N); warnings.warn(f"WS N<=k ({N}<={k}), generating complete graph.", RuntimeWarning)
        elif model_name == 'SBM':
            n_communities = gen_params.get('n_communities', 2); p_intra = gen_params.get('p_intra', 0.2); p_inter = gen_params.get('p_inter', 0.01)
            if N < n_communities: n_communities = N; warnings.warn(f"SBM N<communities", RuntimeWarning)
            if n_communities <= 0: raise ValueError("n_communities must be positive.")
            sizes = []; base_size = N // n_communities; remainder = N % n_communities; i = 0
            while i < n_communities: sizes.append(base_size + (1 if i < remainder else 0)); i += 1
            if 0 in sizes: raise ValueError(f"SBM zero-sized community for N={N}, C={n_communities}")
            probs = []; row_idx = 0
            while row_idx < n_communities:
                 row = []; col_idx = 0
                 while col_idx < n_communities: row.append(p_intra if row_idx == col_idx else p_inter); col_idx += 1
                 probs.append(row); row_idx += 1
            G = nx.stochastic_block_model(sizes=sizes, p=probs, seed=seed)
        elif model_name == 'RGG':
            radius = gen_params.get('radius', 0.1); G = nx.random_geometric_graph(n=N, radius=radius, seed=seed)
        else: raise ValueError(f"Unknown graph model: {model_name}")
    except Exception as e: G = nx.Graph(); warnings.warn(f"Graph gen failed for {model_name} N={N} params={params}: {e}", RuntimeWarning)
    # Relabeling logic (expanded)
    if G.number_of_nodes() > 0:
         needs_relabel = False; node_iterator = iter(G.nodes()); stop_iter_relabel = False
         while not stop_iter_relabel:
             try:
                 node = next(node_iterator)
                 if not isinstance(node, str): needs_relabel = True; stop_iter_relabel = True
             except StopIteration: stop_iter_relabel = True
         if needs_relabel:
             node_mapping = {}; original_nodes_relabel = list(G.nodes()); node_idx_relabel = 0
             while node_idx_relabel < len(original_nodes_relabel): node_mapping[original_nodes_relabel[node_idx_relabel]] = str(original_nodes_relabel[node_idx_relabel]); node_idx_relabel += 1
             G = nx.relabel_nodes(G, node_mapping, copy=False)
    return G

def reversed_sigmoid_func(x, A, x0, k, C):
    """ Reversed sigmoid function (decreasing S-shape). """
    try:
        x_array = np.asarray(x, dtype=float)
        if not all(isinstance(p, (int, float)) and np.isfinite(p) for p in [A, x0, k, C]): return np.full_like(x_array, np.nan, dtype=float)
        exponent_term = k * (x_array - x0); exponent_term_clipped = np.clip(exponent_term, -700, 700)
        denominator = 1.0 + np.exp(exponent_term_clipped); denominator_safe = np.where(denominator == 0, 1e-300, denominator)
        result = A / denominator_safe + C; result_final = np.nan_to_num(result, nan=np.nan, posinf=np.nan, neginf=np.nan); return result_final
    except Exception: return np.full_like(np.asarray(x), np.nan, dtype=float) # Ensure return shape matches x


print("\n--- Cell 11.5: Rule Parameter Sensitivity Analysis (GPU) ---")
print("  Defined local helper functions.")

# --- Configuration Loading ---
config = {}; analysis_error_sensitivity = False
output_dir_sens = None; exp_name_sens = None; sensitivity_param_name = None; sensitivity_values = None
TARGET_MODEL_SENS = 'WS' # Sensitivity usually tested on one baseline model
param_col_name_sens = None; param_base_name_sens = None; system_sizes_sens = []
num_instances_sens = 10; num_trials_sens = 3; rule_params_base_sens = {}; max_steps_sens = 200; conv_thresh_sens = 1e-4
state_dim_sens = 5; workers_sens = 32; primary_metric_sens = 'variance_norm'; all_metrics_sens = []
calculate_energy_sens = False; store_energy_history_sens = False; energy_type_sens = 'pairwise_dot'

# Check if config exists
if 'config' not in globals() or not isinstance(globals()['config'], dict):
     print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_sensitivity = True
else:
    try:
        config = globals()['config'] # Use existing config
        output_dir_sens = config['OUTPUT_DIR']; exp_name_sens = config['EXPERIMENT_NAME']
        sensitivity_param_name = config.get('SENSITIVITY_RULE_PARAM')
        sensitivity_values = config.get('SENSITIVITY_VALUES')
        # Check if sensitivity analysis is configured
        if sensitivity_param_name is None or sensitivity_values is None or len(sensitivity_values) == 0:
             print("ℹ️ Skipping Sensitivity Analysis: SENSITIVITY_RULE_PARAM or SENSITIVITY_VALUES missing/empty in config.")
             analysis_error_sensitivity = True

        if not analysis_error_sensitivity:
             graph_params_sens = config['GRAPH_MODEL_PARAMS'].get(TARGET_MODEL_SENS,{})
             # Find sweep parameter for the target model (WS)
             ws_sweep_key = next((k for k in graph_params_sens if k.endswith('_values')), None)
             if ws_sweep_key:
                 param_base_name_sens = ws_sweep_key.replace('_values', '')
                 param_col_name_sens = param_base_name_sens + '_value'
             else:
                 param_base_name_sens = 'p'; param_col_name_sens = 'p_value' # Default for WS
                 warnings.warn(f"Could not determine WS sweep parameter name for sensitivity. Assuming '{param_col_name_sens}'.")

             print(f"  Sensitivity analysis target: Model={TARGET_MODEL_SENS}, Param='{sensitivity_param_name}', Values={sensitivity_values}")
             print(f"  Sweep parameter for {TARGET_MODEL_SENS}: '{param_col_name_sens}'")

             # Use largest system size for sensitivity analysis for clearer signal
             all_system_sizes = config.get('SYSTEM_SIZES', [])
             if all_system_sizes:
                 system_sizes_sens = [all_system_sizes[-1]] # Get the largest N
             else:
                 system_sizes_sens = [700] # Fallback N
             N_sens = system_sizes_sens[0]
             print(f"  Using system size N = {N_sens} for sensitivity.")

             # Load other necessary params
             num_instances_sens = config['NUM_INSTANCES_PER_PARAM']
             num_trials_sens = config['NUM_TRIALS_PER_INSTANCE']
             rule_params_base_sens = config['RULE_PARAMS']
             max_steps_sens = config['MAX_SIMULATION_STEPS']
             conv_thresh_sens = config['CONVERGENCE_THRESHOLD']
             state_dim_sens = config['STATE_DIM']
             workers_sens = config.get('PARALLEL_WORKERS', 32)
             primary_metric_sens = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')
             all_metrics_sens = config.get('ORDER_PARAMETERS_TO_ANALYZE', [])
             calculate_energy_sens = config.get('CALCULATE_ENERGY', False)
             store_energy_history_sens = config.get('STORE_ENERGY_HISTORY', False)
             energy_type_sens = config.get('ENERGY_FUNCTIONAL_TYPE', 'pairwise_dot')

    except KeyError as e_key_sens:
         print(f"❌ FATAL: Missing key '{e_key_sens}' in config for sensitivity analysis.")
         analysis_error_sensitivity = True
    except Exception as config_e:
         print(f"❌ FATAL: Error loading config for sensitivity: {config_e}"); analysis_error_sensitivity = True

# --- Device Check ---
device_sens = torch.device('cpu') # Default device
if not analysis_error_sensitivity:
    if 'global_device' in globals():
        device_sens = global_device # Use globally set device
    else:
         if torch.cuda.is_available(): device_sens = torch.device('cuda:0')
    print(f"  Using device for sensitivity runs: {device_sens}")


# --- File Paths & Loading ---
all_sensitivity_results_list = []
values_to_run = []
combined_sensitivity_results_file = None
combined_sensitivity_pickle_file = None

if not analysis_error_sensitivity:
    # Define file paths based on the sensitivity parameter name
    combined_sensitivity_results_file = os.path.join(output_dir_sens, f"{exp_name_sens}_sensitivity_{sensitivity_param_name}_COMBINED_results.csv")
    combined_sensitivity_pickle_file = os.path.join(output_dir_sens, f"{exp_name_sens}_sensitivity_{sensitivity_param_name}_COMBINED_partial.pkl")
    values_to_run = list(sensitivity_values) # Copy the list

    # Load existing partial results if available
    if os.path.exists(combined_sensitivity_pickle_file):
        try:
            with open(combined_sensitivity_pickle_file, 'rb') as f_load_sens:
                all_sensitivity_results_list = pickle.load(f_load_sens)
            if isinstance(all_sensitivity_results_list, list) and len(all_sensitivity_results_list) > 0:
                 loaded_sens_df = pd.DataFrame(all_sensitivity_results_list)
                 # Check which sensitivity values are already present
                 if 'sensitivity_param_value' in loaded_sens_df.columns:
                      completed_values = loaded_sens_df['sensitivity_param_value'].unique()
                      # Update values_to_run to only include missing ones
                      values_to_run = [v for v in sensitivity_values if v not in completed_values]
                      print(f"  Loaded {len(all_sensitivity_results_list)} sensitivity results. Values completed: {completed_values}")
                 else:
                      warnings.warn("Loaded sensitivity pickle missing 'sensitivity_param_value' column. Assuming all values need rerunning.", RuntimeWarning)
                      all_sensitivity_results_list = [] # Reset if column missing
            else:
                 all_sensitivity_results_list = [] # Reset if loaded data invalid
        except Exception as e_load_pkl:
            warnings.warn(f"Could not load sensitivity pickle ({e_load_pkl}). Assuming all values need running.", RuntimeWarning)
            all_sensitivity_results_list = []
    print(f"  Sensitivity values remaining to run for '{sensitivity_param_name}': {values_to_run}")


# --- Run Sensitivity Sweeps ---
if not analysis_error_sensitivity and len(values_to_run) > 0:
    print(f"\n--- Running Sensitivity Sweeps for Param: '{sensitivity_param_name}' ---")
    # Ensure spawn method is set (should be done in Cell 0)
    try:
        if mp.get_start_method(allow_none=True) != 'spawn':
             print("🚨 WARNING: Forcing multiprocessing start method to 'spawn' for sensitivity runs.")
             mp.set_start_method('spawn', force=True)
    except Exception as e_set_spawn_sens:
         print(f"⚠️ Warning: Could not set 'spawn' start method: {e_set_spawn_sens}. GPU workers might fail.")

    # Define keys needed to reconstruct results accurately if needed
    essential_param_keys = ['model', 'N', 'instance', 'trial', 'graph_seed', 'sim_seed', 'rule_param_name', 'rule_param_value', param_col_name_sens]

    # Iterate through each sensitivity value that needs to be run
    sens_value_index = 0
    while sens_value_index < len(values_to_run): # Use while loop
         sens_value = values_to_run[sens_value_index]
         print(f"\n-- Running for {sensitivity_param_name} = {sens_value:.4f} --")

         # Define rule parameters for this specific sensitivity value
         current_rule_params = rule_params_base_sens.copy()
         current_rule_params[sensitivity_param_name] = sens_value

         # Generate tasks for this specific sensitivity value
         sens_tasks = get_sweep_parameters(
             graph_model_name=TARGET_MODEL_SENS,
             model_params=config['GRAPH_MODEL_PARAMS'].get(TARGET_MODEL_SENS, {}), # Use original P1 ranges for WS sweep
             system_sizes=system_sizes_sens, # Use specific N for sensitivity
             instances=num_instances_sens,
             trials=num_trials_sens,
             sensitivity_param=sensitivity_param_name, # Pass the sensitivity param name
             sensitivity_values=[sens_value] # Pass ONLY the current value
         )
         print(f"  Generated {len(sens_tasks)} tasks for value {sens_value:.4f}...")

         # Check if tasks were generated and have the sweep parameter column
         tasks_are_valid = False
         if len(sens_tasks) > 0:
             if param_col_name_sens in sens_tasks[0]:
                 tasks_are_valid = True
             else:
                  warnings.warn(f"Generated tasks missing sweep key '{param_col_name_sens}'! Check get_sweep_parameters.", RuntimeWarning)
         else:
              print("  No tasks generated for this sensitivity value.")

         if not tasks_are_valid:
              sens_value_index += 1; continue # Skip to next value if tasks invalid

         # --- Execute Sweep for this Sensitivity Value ---
         sens_start_time = time.time(); futures_map = {}; pool_broken_flag_sens = False
         executor_instance_sens = ProcessPoolExecutor(max_workers=workers_sens)
         try: # Process pool execution
             # Submit tasks to the pool
             task_idx_sens = 0
             while task_idx_sens < len(sens_tasks):
                 task_params = sens_tasks[task_idx_sens]
                 # Ensure sweep key exists before graph gen (redundant check)
                 if param_col_name_sens not in task_params: task_idx_sens += 1; continue

                 graph_gen_params_sens = task_params.get('fixed_params', {}).copy()
                 graph_gen_params_sens[param_base_name_sens] = task_params[param_col_name_sens]
                 G = generate_graph( task_params['model'], graph_gen_params_sens, task_params['N'], task_params['graph_seed'] )

                 if G is None or G.number_of_nodes() == 0: task_idx_sens += 1; continue # Skip failed graph gen

                 # Use the selected worker function (imported earlier)
                 future = executor_instance_sens.submit(
                     worker_func_sens, # Use the imported function
                     G, task_params['N'], task_params, task_params['sim_seed'],
                     current_rule_params, # Pass rules specific to this sensitivity value
                     max_steps_sens, conv_thresh_sens, state_dim_sens,
                     calculate_energy_sens, store_energy_history_sens, energy_type_sens,
                     all_metrics_sens, str(device_sens) # Pass device name as string
                 )
                 futures_map[future] = task_params; task_idx_sens += 1 # Store future and params

             # Collect results using as_completed for better progress update
             pbar_sens = tqdm(total=len(futures_map), desc=f"Sens. ({sens_value:.3f})", mininterval=2.0, unit="task")
             results_this_value = [] # Store results specific to this sens_value run
             try:
                 futures_iterator = as_completed(futures_map)
                 for future in futures_iterator: # Expanded loop for clarity
                     original_task_params = futures_map[future] # Get back original task info
                     if pool_broken_flag_sens: pbar_sens.update(1); continue # Update progress but skip if pool broke

                     try:
                         result_dict = future.result(timeout=1200) # Get result with timeout
                         if result_dict is not None and isinstance(result_dict, dict):
                             # Explicitly reconstruct the full result dictionary from task + result
                             full_result = {}
                             # Copy essential keys from original task parameters
                             essential_idx = 0
                             while essential_idx < len(essential_param_keys):
                                 key = essential_param_keys[essential_idx]
                                 if key in original_task_params:
                                     full_result[key] = original_task_params[key]
                                 essential_idx += 1
                             # Update with the results returned by the worker
                             full_result.update(result_dict)
                             # Final safety check for sweep parameter key (should exist)
                             if param_col_name_sens not in full_result:
                                 if param_col_name_sens in original_task_params:
                                     full_result[param_col_name_sens] = original_task_params[param_col_name_sens]
                                 else:
                                     warnings.warn(f"Essential key '{param_col_name_sens}' missing after merge!", RuntimeWarning)
                             results_this_value.append(full_result) # Add to list for this value
                     except Exception as e_get_sens:
                          error_str_sens = str(e_get_sens)
                          is_broken_sens = False
                          if "Broken" in error_str_sens or "abruptly" in error_str_sens or "shutdown" in error_str_sens: is_broken_sens = True
                          elif isinstance(e_get_sens, TypeError) or isinstance(e_get_sens, AttributeError): is_broken_sens = True
                          if is_broken_sens:
                               print(f"\n❌ Pool broke during sensitivity run ({sens_value:.3f})"); pool_broken_flag_sens = True; break # Exit inner loop
                          else: # Handle other errors like timeout
                               warnings.warn(f"Error getting result for sensitivity task {original_task_params}: {type(e_get_sens).__name__}", RuntimeWarning)
                               error_res_sens = {**original_task_params, 'error_message': f"Future failed: {type(e_get_sens).__name__}"}
                               results_this_value.append(error_res_sens)
                     finally:
                          pbar_sens.update(1) # Update progress bar

             except KeyboardInterrupt: print(f"\nInterrupted sensitivity run ({sens_value:.3f}).")
             finally: pbar_sens.close(); # Ensure progress bar is closed

         except Exception as main_e_sens:
              print(f"\n❌ ERROR during Sensitivity setup/execution for {sens_value:.3f}: {main_e_sens}")
              traceback.print_exc(limit=1)
         finally:
              print(f"Shutting down executor ({sens_value:.3f})..."); executor_instance_sens.shutdown(wait=True, cancel_futures=True); print("Executor shut down.")

         # Process results for this sensitivity value
         sens_end_time = time.time(); print(f"  ✅ Sweep for {sens_value:.3f} completed ({sens_end_time-sens_start_time:.1f}s).")
         # Filter out potential None results before adding
         valid_results_this_value = [r for r in results_this_value if r is not None and isinstance(r, dict)]
         added_now = 0
         if len(valid_results_this_value) > 0:
              # Extend the main list with results from this value
              all_sensitivity_results_list.extend(valid_results_this_value)
              added_now = len(valid_results_this_value)
              print(f"  Added {added_now} valid results to main list.")
              # Save incrementally after each sensitivity value completes
              try:
                  with open(combined_sensitivity_pickle_file, 'wb') as f_comb_sens_inc:
                       pickle.dump(all_sensitivity_results_list, f_comb_sens_inc)
              except Exception as e_save_inc_sens:
                   warnings.warn(f"Incremental save of sensitivity results failed: {e_save_inc_sens}", RuntimeWarning)
         else:
              print("  ⚠️ No valid results obtained for this sensitivity value.")

         # Check if the pool broke during this run
         if pool_broken_flag_sens:
              print("❌ Aborting sensitivity sweep because process pool failed.")
              analysis_error_sensitivity = True # Mark analysis as failed
              break # Exit the outer while loop over sensitivity values

         sens_value_index += 1 # Increment outer loop counter

    # Final message if errors occurred during the sweep
    if analysis_error_sensitivity: print("\n❌ Errors occurred during sensitivity sweep execution.")


# --- Save Combined Sensitivity Results ---
global_sensitivity_results = pd.DataFrame() # Initialize global variable
if not analysis_error_sensitivity and len(all_sensitivity_results_list) > 0:
    print("\nSaving combined sensitivity results...")
    try:
        # Create DataFrame from the combined list
        combined_sens_df = pd.DataFrame(all_sensitivity_results_list)

        # --- Critical Check: Ensure sweep parameter column exists ---
        if param_col_name_sens not in combined_sens_df.columns:
             # This should not happen if task generation and result merging worked correctly
             missing_col_msg = f"CRITICAL ERROR: Sweep parameter column '{param_col_name_sens}' is missing from the final sensitivity DataFrame! Check worker result merging."
             warnings.warn(missing_col_msg, RuntimeWarning)
             raise KeyError(missing_col_msg) # Raise error to prevent saving bad data
        else:
             print(f"  Column '{param_col_name_sens}' confirmed present in sensitivity DataFrame.")

        # Save metadata to CSV (excluding large vector columns)
        cols_to_save_sens = [col for col in combined_sens_df.columns if col not in ['final_state_vector', 'state_history', 'avg_change_history', 'baseline_state_for_spread']]
        combined_sens_df[cols_to_save_sens].to_csv(combined_sensitivity_results_file, index=False)
        # Save the full data (including vectors if present) to Pickle
        with open(combined_sensitivity_pickle_file, 'wb') as f_comb_sens_final:
             pickle.dump(all_sensitivity_results_list, f_comb_sens_final)
        print(f"  ✅ Combined sensitivity results saved ({combined_sens_df.shape[0]} entries). CSV: '{combined_sensitivity_results_file}', Pickle: '{combined_sensitivity_pickle_file}'")
        # Assign the full DataFrame to the global variable
        global_sensitivity_results = combined_sens_df

    except KeyError as e_key_save: # Catch the specific KeyError raised above
         print(f"❌ Error saving sensitivity DataFrame due to missing key: {e_key_save}")
         global_sensitivity_results = pd.DataFrame() # Ensure global var is empty DF
    except Exception as e_save_comb_sens:
         print(f"❌ Error creating/saving combined sensitivity DataFrame: {e_save_comb_sens}")
         traceback.print_exc(limit=1)
         global_sensitivity_results = pd.DataFrame() # Ensure global var is empty DF


# --- Inspect DataFrame ---
print("\n--- Inspecting `global_sensitivity_results` DataFrame ---")
if 'global_sensitivity_results' in globals() and isinstance(global_sensitivity_results, pd.DataFrame) and not global_sensitivity_results.empty:
    print(f"  Shape: {global_sensitivity_results.shape}")
    print(f"  Columns: {list(global_sensitivity_results.columns)}")
    # Check specifically for the sweep parameter and sensitivity value columns
    sweep_col_present = param_col_name_sens in global_sensitivity_results.columns
    sens_val_col_present = 'sensitivity_param_value' in global_sensitivity_results.columns
    print(f"  Sweep Param Column ('{param_col_name_sens}') Present: {'✅ Yes' if sweep_col_present else '❌ NO'}")
    print(f"  Sensitivity Value Column ('sensitivity_param_value') Present: {'✅ Yes' if sens_val_col_present else '❌ NO'}")
    print("  Head:\n", global_sensitivity_results.head().to_string())
else:
    print("  DataFrame `global_sensitivity_results` is missing or empty.")


# --- Analyze Sensitivity Impact (Simple Fit) ---
if not analysis_error_sensitivity and 'global_sensitivity_results' in globals() and isinstance(global_sensitivity_results, pd.DataFrame) and not global_sensitivity_results.empty:
    # Check required columns for analysis
    required_analysis_cols = [param_col_name_sens, 'sensitivity_param_value', primary_metric_sens]
    cols_missing_analysis = False
    col_check_idx = 0
    while col_check_idx < len(required_analysis_cols):
        if required_analysis_cols[col_check_idx] not in global_sensitivity_results.columns:
             print(f"❌ Cannot analyze sensitivity impact: Column '{required_analysis_cols[col_check_idx]}' missing.")
             cols_missing_analysis = True
             break
        col_check_idx += 1

    if not cols_missing_analysis:
         print(f"\n--- Analyzing Impact of '{sensitivity_param_name}' on Critical Point (Simple Sigmoid Fit) ---")
         sensitivity_analysis_results = [] # Store fit results {sens_value: pc_estimate}
         # Get unique sensitivity values present in the results
         valid_sens_values_present = sorted(global_sensitivity_results['sensitivity_param_value'].unique())

         if len(valid_sens_values_present) == 0:
              print("  No sensitivity values found in the results DataFrame.")
         else:
              # Iterate through each sensitivity value found in the results
              sens_idx_analyze = 0
              while sens_idx_analyze < len(valid_sens_values_present):
                  sens_value = valid_sens_values_present[sens_idx_analyze]
                  print(f"  Analyzing results for {sensitivity_param_name} = {sens_value:.4f}")
                  # Filter DataFrame for the current sensitivity value
                  sens_value_df = global_sensitivity_results[global_sensitivity_results['sensitivity_param_value'] == sens_value]

                  # Aggregate results for fitting: group by sweep param, calc mean/std of order param
                  pc_est = np.nan # Default estimate
                  try:
                      # Group by the primary sweep parameter (e.g., p_value)
                      # Calculate mean and std dev of the primary order parameter
                      agg_sens_df = sens_value_df.groupby(param_col_name_sens)[primary_metric_sens].agg(['mean', 'std']).reset_index()
                      # Drop rows where mean could not be calculated (e.g., all NaNs in group)
                      agg_sens_df = agg_sens_df.dropna(subset=['mean'])

                      # Check if enough data points remain for fitting (e.g., need at least 4)
                      min_points_for_fit = 4
                      if agg_sens_df.empty or len(agg_sens_df) < min_points_for_fit:
                          print(f"    Not enough aggregated data points ({len(agg_sens_df)}) to perform sigmoid fit. Need >= {min_points_for_fit}.")
                          sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': np.nan})
                      else:
                          # --- Perform Sigmoid Fit ---
                          p_vals_sens = agg_sens_df[param_col_name_sens].values # X values for fit
                          metric_vals_sens = agg_sens_df['mean'].values # Y values for fit

                          # Provide initial guesses for parameters [A, x0, k, C]
                          min_met=np.min(metric_vals_sens); max_met=np.max(metric_vals_sens)
                          amp_guess=max_met-min_met # Amplitude guess
                          pc_guess=np.median(p_vals_sens) # Guess pc is near median of p values
                          p_range=max(p_vals_sens)-min(p_vals_sens)
                          k_guess=abs(amp_guess)/(p_range+1e-6)*4 # Steepness guess (heuristic)
                          offset_guess=min_met # Offset guess
                          initial_params = [amp_guess, pc_guess, k_guess, offset_guess]

                          # Define bounds for parameters if needed (optional but recommended)
                          # Bounds: ([A_min, x0_min, k_min, C_min], [A_max, x0_max, k_max, C_max])
                          fit_bounds = ([-np.inf, min(p_vals_sens), 1e-3, -np.inf], [np.inf, max(p_vals_sens), 1e3, np.inf])

                          try:
                              # Use curve_fit to find optimal parameters
                              params, cov = curve_fit(reversed_sigmoid_func, p_vals_sens, metric_vals_sens, p0=initial_params, bounds=fit_bounds, maxfev=8000)
                              # The estimated critical point (x0) is the second parameter
                              pc_est = params[1]
                              # Check if the estimated pc is within the range of the data (sanity check)
                              if pc_est < min(p_vals_sens) or pc_est > max(p_vals_sens):
                                   warnings.warn(f"Sigmoid fit pc={pc_est:.4f} is outside the range of data [{min(p_vals_sens):.4f}, {max(p_vals_sens):.4f}] for sens_value={sens_value:.4f}.", RuntimeWarning)
                              print(f"    Estimated p_c ≈ {pc_est:.6f}")
                              sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': pc_est})
                          except RuntimeError as fit_err:
                              print(f"    Sigmoid fit failed to converge: {fit_err}")
                              sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': np.nan})
                          except Exception as fit_e:
                               print(f"    Sigmoid fit failed with unexpected error: {fit_e}")
                               sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': np.nan})

                  except KeyError as e_agg_key:
                       print(f"    ❌ KeyError during aggregation: {e_agg_key}. Check columns.")
                       sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': np.nan})
                  except Exception as agg_err:
                      print(f"    Error during aggregation/fitting: {agg_err}")
                      sensitivity_analysis_results.append({'sens_value': sens_value, 'pc': np.nan})

                  sens_idx_analyze += 1 # Increment loop counter

              # --- Plot Sensitivity Results ---
              if len(sensitivity_analysis_results) > 0:
                  # Create DataFrame from the fit results
                  sens_results_df = pd.DataFrame(sensitivity_analysis_results)
                  # Drop rows where pc estimation failed (NaN)
                  sens_results_df = sens_results_df.dropna(subset=['pc'])

                  if not sens_results_df.empty:
                      # Create the plot
                      fig_sens, ax_sens = plt.subplots(figsize=(8, 5))
                      # Plot estimated pc vs sensitivity parameter value
                      ax_sens.plot(sens_results_df['sens_value'], sens_results_df['pc'], marker='o', linestyle='-')
                      # Label axes and title
                      ax_sens.set_xlabel(f"Rule Parameter: {sensitivity_param_name}")
                      ax_sens.set_ylabel(f"Estimated Critical Point (p_c for {TARGET_MODEL_SENS})")
                      ax_sens.set_title(f"Sensitivity of Critical Point to {sensitivity_param_name}")
                      ax_sens.grid(True, linestyle=':')
                      plt.tight_layout()
                      # Save the plot
                      sens_plot_path = os.path.join(output_dir_sens, f"{exp_name_sens}_sensitivity_pc_vs_{sensitivity_param_name}.png")
                      try:
                           plt.savefig(sens_plot_path, dpi=150)
                           print(f"  ✅ Sensitivity plot saved to: {sens_plot_path}")
                      except Exception as e_save_sens_plot:
                           print(f"  ❌ Error saving sensitivity plot: {e_save_sens_plot}")
                      plt.show() # Display the plot
                      plt.close(fig_sens) # Close the figure
                  else:
                      print("  No successful sigmoid fits obtained to plot sensitivity.")
              else:
                   print("  No sensitivity analysis results generated.") # Should not happen if loop ran unless all fits failed
else:
    print("❌ Skipping Sensitivity Analysis section due to configuration, missing data, or errors.")

print("\n✅ Cell 11.5: Rule Parameter Sensitivity Analysis completed.")

In [None]:
# Cell 11.6: State Dimensionality Comparison (Fix Graph Params)
# Description: Runs basic WS sweeps for 1D and 2D state representations.
#              Fixes KeyError by correctly passing parameters to generate_graph.
#              Qualitatively compares behavior to the 5D baseline.

import pandas as pd
import numpy as np
import networkx as nx
import time
import os
import pickle
import itertools
import warnings
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import torch  # Ensure torch is available if used by simplified runners
import multiprocessing as mp  # Ensure imported if using ProcessPool
import copy # For deepcopy

print("\n--- Cell 11.6: State Dimensionality Comparison (Fix Graph Params) ---")

# --- Configuration & Prerequisite Checks ---
analysis_error_dim = False
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); analysis_error_dim = True
# Need the Phase 2 worker as it needs to handle variable state_dim
if 'run_single_instance_phase2' not in globals():
    print("❌ FATAL: Phase 2 worker function 'run_single_instance_phase2' not defined. Run Cell 2.")
    analysis_error_dim = True
else:
     # Assign the correct worker function
     worker_func_dim = run_single_instance_phase2
     print("  Using Phase 2 worker 'run_single_instance_phase2' for dimensionality tests.")

if 'get_sweep_parameters' not in globals() or 'generate_graph' not in globals():
    print("❌ FATAL: Helper functions missing. Run Cell 2."); analysis_error_dim = True

# Load config vars needed
dims_to_test_config = []
fixed_N_dim = 100
target_model_dim = 'WS'
graph_params_all_dim = {}
graph_params_dim = {}
param_name_dim = None
param_values_dim = None
param_col_name_dim = None
num_instances_dim = 5
num_trials_dim = 2
rule_params_base_dim = {}
max_steps_dim = 200
conv_thresh_dim = 1e-4
workers_dim = 32
output_dir_dim = None
exp_name_dim = None
primary_metric_dim = 'variance_norm'

if not analysis_error_dim:
     try:
         config = globals()['config'] # Use existing config
         # Get dimensions to test (e.g., [1, 2]), excluding the baseline 5D
         dims_to_test_config = config.get('DIMENSIONALITY_TEST_SIZES', [1, 2, 5])
         dims_to_test = [d for d in dims_to_test_config if d != 5 and isinstance(d, int) and d > 0]
         if not dims_to_test:
             print("ℹ️ No dimensions selected for comparison (excluding baseline D=5 or invalid values). Skipping.")
             analysis_error_dim = True

         # Get parameters for the comparison run
         fixed_N_dim = config.get('DIMENSIONALITY_TEST_N', 100) # Use specific N for this test
         target_model_dim = 'WS'  # Compare using WS model as baseline
         graph_params_all_dim = config.get('GRAPH_MODEL_PARAMS', {})
         # Use original Phase 1 WS params for this sweep
         graph_params_dim = graph_params_all_dim.get(target_model_dim, {})
         if not graph_params_dim:
             print(f"❌ FATAL: Graph parameters for '{target_model_dim}' not found in config.")
             analysis_error_dim = True

         # Find primary sweep param name and values for WS model
         ws_sweep_key = next((k for k in graph_params_dim if k.endswith('_values')), None)
         if ws_sweep_key:
             param_name_dim = ws_sweep_key.replace('_values', '')  # e.g., 'p'
             param_values_dim = graph_params_dim[ws_sweep_key]
             param_col_name_dim = param_name_dim + '_value'  # e.g., 'p_value'
         else:
             print(f"❌ FATAL: Could not find sweep parameter (ending in '_values') for {target_model_dim}.")
             analysis_error_dim = True

         # Adjust run parameters for potentially quicker dimensionality test runs
         num_instances_dim = max(1, config.get('NUM_INSTANCES_PER_PARAM', 10) // 2)
         num_trials_dim = max(1, config.get('NUM_TRIALS_PER_INSTANCE', 3) // 2)
         rule_params_base_dim = config.get('RULE_PARAMS', {})
         max_steps_dim = config.get('MAX_SIMULATION_STEPS', 200)
         conv_thresh_dim = config.get('CONVERGENCE_THRESHOLD', 1e-4)
         workers_dim = config.get('PARALLEL_WORKERS', os.cpu_count())
         output_dir_dim = config['OUTPUT_DIR']
         exp_name_dim = config['EXPERIMENT_NAME']
         primary_metric_dim = config.get('PRIMARY_ORDER_PARAMETER', 'variance_norm')

     except KeyError as e_key_dim:
          print(f"❌ FATAL: Missing key '{e_key_dim}' in config for dimensionality test.")
          analysis_error_dim = True
     except Exception as e_conf_dim:
          print(f"❌ FATAL: Error loading config for dimensionality test: {e_conf_dim}.")
          analysis_error_dim = True


# --- Device Check ---
device_dim = torch.device('cpu') # Default device
if not analysis_error_dim:
    if 'global_device' in globals():
        device_dim = global_device # Use globally set device
    else:
         if torch.cuda.is_available(): device_dim = torch.device('cuda:0')
    print(f"  Using device for dimensionality runs: {device_dim}")


# --- Run Sweeps for 1D and 2D ---
dim_results_list = [] # Store results from D=1, D=2 runs
if not analysis_error_dim:
    print(f"\n--- Running Dimensionality Sweeps for D={dims_to_test} (N={fixed_N_dim}) ---")
    # Set spawn method if needed (should be done in Cell 0)
    try:
        if mp.get_start_method(allow_none=True) != 'spawn':
            print("🚨 WARNING: Forcing multiprocessing start method to 'spawn' for dimensionality runs.")
            mp.set_start_method('spawn', force=True)
    except Exception as e_set_spawn_dim:
         print(f"⚠️ Warning: Could not force 'spawn' start method: {e_set_spawn_dim}. GPU workers might fail.")

    # Iterate through the dimensions to test (e.g., [1, 2])
    dim_idx = 0
    while dim_idx < len(dims_to_test):
        current_dim = dims_to_test[dim_idx]
        print(f"\n--- Running Dimensionality Sweep for D = {current_dim} ---")
        # Note: Using the same 5D rule parameters. How they affect fewer dimensions depends on the hdc_5d_step_vectorized_torch implementation.
        # We assume the step function handles lower dimensions gracefully (as implemented in Cell 2).
        current_rule_params_dim = rule_params_base_dim.copy()

        # Generate tasks for this dimension using Phase 1 WS sweep range
        dim_tasks = get_sweep_parameters(
            graph_model_name=target_model_dim,
            model_params=graph_params_dim, # Use original Phase 1 WS params
            system_sizes=[fixed_N_dim], # Only the specific N for this test
            instances=num_instances_dim,
            trials=num_trials_dim
        )
        print(f"  Generated {len(dim_tasks)} tasks for D={current_dim}, N={fixed_N_dim}.")

        # Check if tasks were generated
        if not dim_tasks:
            print("  No tasks generated, skipping this dimension.")
            dim_idx += 1; continue

        # Execute sweep for this dimension
        dim_start_time = time.time()
        dim_futures = {} # Map future object to original task parameters
        pool_broken_flag_dim = False
        executor_instance_dim = ProcessPoolExecutor(max_workers=workers_dim)
        try:
            # Submit tasks
            submit_idx_dim = 0
            while submit_idx_dim < len(dim_tasks):
                task_params = dim_tasks[submit_idx_dim]
                # *** CORRECTED PARAMETER PASSING TO generate_graph ***
                graph_gen_params_dim = task_params.get('fixed_params', {}).copy()
                sweep_param_col = param_col_name_dim # e.g., 'p_value'
                if sweep_param_col in task_params:
                    # Add sweep value using the base name expected by generate_graph (e.g., 'p')
                    graph_gen_params_dim[param_name_dim] = task_params[sweep_param_col]
                else:
                    # This case indicates an error in get_sweep_parameters
                    warnings.warn(f"Sweep column {sweep_param_col} not found in task {task_params}. Graph generation may fail.", RuntimeWarning)
                    # Add a default value if needed, though failure is likely
                    graph_gen_params_dim[param_name_dim] = 0.1 # Example default

                # Generate graph
                G = generate_graph(task_params['model'], graph_gen_params_dim, task_params['N'], task_params['graph_seed'])
                # *********************************************************

                if G is None or G.number_of_nodes() == 0:
                    submit_idx_dim += 1; continue  # Skip failed graph gen

                # Submit task using the Phase 2 worker, passing the current dimension
                future = executor_instance_dim.submit(
                    worker_func_dim,  # Use the assigned Phase 2 worker
                    graph=G, N=task_params['N'], instance_params=task_params, trial_seed=task_params['sim_seed'],
                    rule_params_in=current_rule_params_dim,
                    max_steps=max_steps_dim, conv_thresh=conv_thresh_dim,
                    state_dim=current_dim,  # *** Pass the dimension to simulate ***
                    calculate_energy=False,  # Disable energy for simplicity
                    store_energy_history=False,
                    energy_type=None,
                    metrics_to_calc=['variance_norm', 'entropy_dim_0'],  # Request only relevant metrics
                    device=str(device_dim),
                    # Phase 2 specific args (can be default/False for this run)
                    store_state_history=False,
                    perturbation_params=None,
                    phase2_metrics_to_calc=[],
                    keep_full_state_history=False # Don't need history back
                )
                dim_futures[future] = task_params  # Map future to task
                submit_idx_dim += 1 # Increment submit loop

            # Collect results
            pbar_dim = tqdm(total=len(dim_futures), desc=f"Sweep D={current_dim}", mininterval=2.0, unit="task")
            results_this_dim = []
            try:
                # Use as_completed to process futures as they finish
                future_get_iter = as_completed(dim_futures)
                for future in future_get_iter:
                    original_task_params_dim = dim_futures[future] # Get back original params
                    if pool_broken_flag_dim: pbar_dim.update(1); continue # Skip if pool broke

                    try:
                        result_dict = future.result(timeout=300) # Shorter timeout for potentially simpler runs
                        if result_dict is not None and isinstance(result_dict, dict):
                            # Combine original task params with worker result
                            full_result = copy.deepcopy(original_task_params_dim)
                            full_result.update(result_dict)
                            full_result['state_dim_run'] = current_dim  # Explicitly add dimension run
                            # Remove vector/history if present to save memory
                            if 'final_state_vector' in full_result: del full_result['final_state_vector']
                            if 'state_history' in full_result: del full_result['state_history']
                            if 'avg_change_history' in full_result: del full_result['avg_change_history']
                            results_this_dim.append(full_result)
                    except Exception as e_get_dim:
                        error_str_dim = str(e_get_dim)
                        is_broken_dim = False
                        if "Broken" in error_str_dim or "abruptly" in error_str_dim or "shutdown" in error_str_dim: is_broken_dim = True
                        elif isinstance(e_get_dim, TypeError) or isinstance(e_get_dim, AttributeError): is_broken_dim = True
                        if is_broken_dim:
                            pool_broken_flag_dim = True
                            print(f"\n❌ Pool broke during D={current_dim} run."); break # Exit collection loop
                        else: # Handle other errors like timeout
                            warnings.warn(f"Error getting result for D={current_dim} task {original_task_params_dim}: {type(e_get_dim).__name__}", RuntimeWarning)
                    finally:
                        pbar_dim.update(1) # Update progress bar

            except KeyboardInterrupt:
                print(f"\nInterrupted D={current_dim} run.")
            finally:
                pbar_dim.close() # Ensure progress bar is closed

        except Exception as main_e_dim:
            print(f"\n❌ ERROR during Dimensionality setup/execution for D={current_dim}: {main_e_dim}")
            traceback.print_exc(limit=1)
        finally:
            print(f"Shutting down executor D={current_dim}...")
            executor_instance_dim.shutdown(wait=True, cancel_futures=True)
            print("Executor shut down.")

        # Process results for this dimension
        dim_end_time = time.time()
        print(f"  ✅ Sweep for D={current_dim} completed ({dim_end_time - dim_start_time:.1f}s).")
        valid_results_this_dim = [r for r in results_this_dim if r is not None and isinstance(r, dict)]
        if len(valid_results_this_dim) > 0:
            dim_results_list.extend(valid_results_this_dim)
            print(f"  Added {len(valid_results_this_dim)} results for D={current_dim}.")
        else:
            print(f"  ⚠️ No valid results obtained for D={current_dim}.")

        # Check if pool broke during this dimension's run
        if pool_broken_flag_dim:
            print(f"❌ Aborting dimensionality sweep due to broken pool at D={current_dim}.")
            analysis_error_dim = True # Mark analysis as failed
            break # Exit the outer while loop over dimensions

        dim_idx += 1 # Increment dimension loop counter

# --- Qualitative Comparison Plot ---
if not analysis_error_dim and len(dim_results_list) > 0:
    print("\n--- Plotting Dimensionality Comparison ---")
    # Create DataFrame from the collected D=1, D=2 results
    dim_results_df = pd.DataFrame(dim_results_list)

    # Validate DataFrame structure before plotting
    plot_error_dim = False
    if 'state_dim_run' not in dim_results_df.columns:
         print("❌ Cannot plot: 'state_dim_run' column missing from results."); plot_error_dim = True
    if param_col_name_dim not in dim_results_df.columns:
         print(f"❌ Cannot plot: Primary sweep column '{param_col_name_dim}' missing from results."); plot_error_dim = True
    if primary_metric_dim not in dim_results_df.columns:
          print(f"❌ Cannot plot: Primary metric column '{primary_metric_dim}' missing from results."); plot_error_dim = True

    if not plot_error_dim:
        fig_dim, ax_dim = plt.subplots(figsize=(10, 6))
        dims_found = sorted(dim_results_df['state_dim_run'].unique())
        # Use distinct colors/markers for D=1, D=2
        plot_styles = {1: ('royalblue', 'o', '-'), 2: ('firebrick', 'x', '-')}

        # Plot D=1 and D=2 results using a loop
        d_plot_idx = 0
        while d_plot_idx < len(dims_found):
            d = dims_found[d_plot_idx]
            d_data = dim_results_df[dim_results_df['state_dim_run'] == d]
            if not d_data.empty:
                # Aggregate data: group by sweep param, calc mean/std of primary metric
                agg_d_data = d_data.groupby(param_col_name_dim)[primary_metric_dim].agg(['mean', 'std']).reset_index().dropna()
                if not agg_d_data.empty:
                    style = plot_styles.get(d, ('black', '.', '--')) # Fallback style
                    ax_dim.errorbar(agg_d_data[param_col_name_dim], agg_d_data['mean'], yerr=agg_d_data['std'],
                                    marker=style[1], linestyle=style[2], color=style[0],
                                    label=f'D = {d}', capsize=3, alpha=0.8, markersize=5)
            d_plot_idx += 1

        # --- Load and plot 5D baseline (using primary WS sweep results) ---
        baseline_5d_data = pd.DataFrame() # Initialize empty
        if 'global_sweep_results' in globals() and isinstance(global_sweep_results, pd.DataFrame) and not global_sweep_results.empty:
             # Filter for WS model and largest N from that sweep
             baseline_N = global_sweep_results['N'].max()
             baseline_5d_data = global_sweep_results[(global_sweep_results['model'] == target_model_dim) &
                                                      (global_sweep_results['N'] == baseline_N)].copy()
             if not baseline_5d_data.empty:
                  # Check required columns exist in baseline data
                  baseline_cols_ok = True
                  if primary_metric_dim not in baseline_5d_data.columns: baseline_cols_ok = False
                  if param_col_name_dim not in baseline_5d_data.columns: baseline_cols_ok = False

                  if baseline_cols_ok:
                      agg_5d_data = baseline_5d_data.groupby(param_col_name_dim)[primary_metric_dim].agg(['mean', 'std']).reset_index().dropna()
                      if not agg_5d_data.empty:
                          ax_dim.errorbar(agg_5d_data[param_col_name_dim], agg_5d_data['mean'], yerr=agg_5d_data['std'],
                                          marker='s', linestyle='--', label=f'D = 5 (Baseline, N={baseline_N})',
                                          capsize=3, alpha=0.7, markersize=4, color='black', zorder=5) # Plot baseline on top
                      else: print("  ⚠️ Baseline D=5 data empty after aggregation.")
                  else: print("  ⚠️ Baseline D=5 data missing required columns for plotting.")
             else: print("  ⚠️ Baseline D=5 data empty after filtering.")
        else:
            print("  ⚠️ Could not load D=5 baseline data ('global_sweep_results' missing or empty).")


        # --- Finalize Plot ---
        ax_dim.set_xlabel(f"Topological Parameter ({param_name_dim} for {target_model_dim})")
        ax_dim.set_ylabel(f"Order Parameter ({primary_metric_dim})")
        ax_dim.set_title(f"Impact of State Dimensionality (N={fixed_N_dim} vs D=5 N={baseline_N if not baseline_5d_data.empty else 'N/A'})")
        ax_dim.set_xscale('log') # Use log scale for WS 'p' parameter
        ax_dim.grid(True, linestyle=':')
        ax_dim.legend()
        plt.tight_layout()
        # Save the plot
        if output_dir_dim is not None and exp_name_dim is not None:
             dim_plot_path = os.path.join(output_dir_dim, f"{exp_name_dim}_dimensionality_comparison.png")
             try:
                 plt.savefig(dim_plot_path, dpi=150)
                 print(f"  ✅ Dimensionality comparison plot saved to: {dim_plot_path}")
             except Exception as e_save_dim:
                 print(f"  ❌ Error saving dimensionality plot: {e_save_dim}")
        else:
             print("  ⚠️ Could not save dimensionality plot (output path or experiment name missing).")
        plt.show() # Display the plot
        plt.close(fig_dim) # Close the figure

        print("\n  Qualitative Conclusion:")
        print("    Compare curves visually. Differences indicate state dimension impacts the emergent dynamics and transition behavior.")
        print("    Lower dimensions might show simpler transitions or different critical points.")

elif not analysis_error_dim:
    print("❌ Skipping dimensionality comparison plotting: No valid results collected.")
else:
    print("❌ Skipping dimensionality comparison due to errors or config flags.")

print("\n✅ Cell 11.6: State Dimensionality Comparison completed.")

In [None]:
# Cell 12: PCA Analysis of Attractor Landscapes (Phase 2 - Load Pickle)
# Description: Loads landscape data from Pickle. Performs PCA if flag is set.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import os
# import ast # No longer needed for parsing with pickle
import warnings
import traceback
import json
import pickle # To load landscape data from pickle file

print("\n--- Cell 12: PCA Analysis of Attractor Landscapes (Phase 2 - Load Pickle) ---")

# --- Configuration & Prerequisites ---
pca_error = False
# Check for config dictionary
if 'config' not in globals() or not isinstance(config, dict):
    print("❌ FATAL: Config dictionary missing. Run Cell 1."); pca_error = True
elif not config.get('RUN_PCA_ANALYSIS', False): # Check flag in config
    print("ℹ️ Skipping PCA Analysis: RUN_PCA_ANALYSIS is False in config.")
    pca_error = True

# Load necessary parameters from config if proceeding
output_dir = None; exp_name = None; state_dim = 5; N_for_landscape = 700
if not pca_error:
    try:
        config = globals()['config'] # Use existing config
        output_dir = config['OUTPUT_DIR']; exp_name = config['EXPERIMENT_NAME']
        state_dim = config.get('STATE_DIM', 5)
        # Determine N used for landscape runs (usually largest N)
        landscape_N_list = config.get('SYSTEM_SIZES', [700])
        if landscape_N_list:
             # Ensure list is not empty before accessing last element
             if len(landscape_N_list) > 0:
                  N_for_landscape = landscape_N_list[-1] # Use largest N from config
             else:
                   N_for_landscape = 700 # Fallback if list is empty
        else:
             N_for_landscape = 700 # Fallback
        print(f"  Config loaded. Target N for PCA: {N_for_landscape}")
    except KeyError as e_key_pca_conf:
         print(f"❌ FATAL: Missing key '{e_key_pca_conf}' in config for PCA.")
         pca_error = True
    except Exception as e_pca_conf:
         print(f"❌ FATAL: Error loading config for PCA: {e_pca_conf}.")
         pca_error = True


# --- Load Landscape Data (from Pickle generated in Cell 11.8) ---
landscape_results_list = [] # Initialize empty list
if not pca_error:
    # Construct the expected pickle file path using Phase 2 variables
    landscape_pickle_file = os.path.join(output_dir, f"{exp_name}_landscape_COMBINED_partial.pkl")
    if os.path.exists(landscape_pickle_file):
        print(f"  Loading landscape data (with vectors) from: {landscape_pickle_file}")
        try:
            with open(landscape_pickle_file, 'rb') as f_load_pca:
                landscape_results_list = pickle.load(f_load_pca)
            # Check if loaded data is a non-empty list
            if not isinstance(landscape_results_list, list) or len(landscape_results_list) == 0:
                print("  ⚠️ Warning: Loaded landscape data list is empty or not a list.");
                landscape_results_list = [] # Reset if invalid
                pca_error = True # Treat as error if no data loaded
            else:
                print(f"  Loaded {len(landscape_results_list)} entries from landscape pickle.")
        except Exception as e_load_pca_pkl:
            print(f"❌ Error loading landscape pickle: {e_load_pca_pkl}");
            landscape_results_list = [] # Reset list
            pca_error = True # Treat as error if load fails
    else:
        # If the pickle file doesn't exist, PCA cannot be performed
        print(f"❌ Landscape data pickle not found: {landscape_pickle_file}")
        print(f"   Run Cell 11.8 (Attractor Landscape UMAP/Data Collection) first.")
        pca_error = True

# --- Prepare Data for PCA from Loaded List ---
final_state_matrix = None # Initialize matrix
pca_metadata = [] # Store corresponding parameters (model, p_value, etc.) for plotting
pca_data_prepared = False # Flag

if not pca_error:
    print(f"  Processing final state vectors for N={N_for_landscape}...")
    valid_flat_states = [] # List to hold valid numpy vectors
    target_vector_size = N_for_landscape * state_dim # Expected size of flattened vector

    # Iterate through the loaded list of result dictionaries
    item_idx = 0
    num_items_total = len(landscape_results_list)
    while item_idx < num_items_total:
        item = landscape_results_list[item_idx]
        # Check if the item is a dictionary, matches the target N, and has the vector
        is_valid_dict = isinstance(item, dict)
        is_target_N = False
        has_vector = False
        is_error_free = False
        if is_valid_dict:
             # Use .get with default for safety if N is missing
             if item.get('N') == N_for_landscape: is_target_N = True
             if 'final_state_vector' in item: has_vector = True
             if item.get('error_message') is None: is_error_free = True # Only use non-error results

        # Only process if it's for the correct N, has a vector, and didn't report an error
        if is_target_N and has_vector and is_error_free:
            vec = item['final_state_vector']
            # Check type, attempt conversion to numpy float array, check length and validity
            current_vector = None
            vector_valid_for_pca = False
            if vec is not None: # Ensure vector is not None
                try:
                    # Ensure it's a numpy array of floats
                    current_vector = np.array(vec, dtype=float)
                    # Check expected flattened length
                    is_correct_length = (current_vector.size == target_vector_size)
                    # Check for NaNs or Infs
                    contains_no_nan_inf = False # Assume invalid until checked
                    if is_correct_length: # Only check content if length is right
                         if not (np.isnan(current_vector).any() or np.isinf(current_vector).any()):
                              contains_no_nan_inf = True

                    if is_correct_length and contains_no_nan_inf:
                        vector_valid_for_pca = True # Mark as valid if all checks pass
                    # else: # Optional debug prints for invalid vectors
                    #     if not is_correct_length: print(f"Debug PCA Prep: Incorrect length {current_vector.size} vs {target_vector_size}")
                    #     if not contains_no_nan_inf: print(f"Debug PCA Prep: Vector contains NaN/Inf")

                except (ValueError, TypeError) as parse_err:
                    # Catch errors during np.array conversion (e.g., if vec is not numerical)
                    warnings.warn(f"Could not convert vector to valid numpy array during PCA prep: {parse_err}", RuntimeWarning)
                    vector_valid_for_pca = False # Mark as invalid on error
                except Exception as unexpected_err:
                     warnings.warn(f"Unexpected error processing vector: {unexpected_err}", RuntimeWarning)
                     vector_valid_for_pca = False # Mark as invalid on unexpected errors

            # If vector is valid, add it and its metadata
            if vector_valid_for_pca:
                valid_flat_states.append(current_vector)
                # Store relevant metadata for coloring plot later
                meta = {'model': item.get('model', 'Unknown')}
                # Find the parameter value key dynamically (e.g., p_value, radius_value)
                param_key = 'unknown_param_value' # Default key
                param_name_found = 'unknown' # Default name
                # Iterate through keys to find the one ending in '_value'
                item_key_iter = iter(item.keys())
                stop_key_iter = False
                while not stop_key_iter:
                     try:
                          key = next(item_key_iter)
                          if isinstance(key, str) and key.endswith('_value'): # Ensure key is string
                               param_key = key
                               param_name_found = key.replace('_value', '')
                               stop_key_iter = True
                     except StopIteration:
                          stop_key_iter = True
                # Store parameter name and value
                meta['parameter_name'] = param_name_found
                meta['parameter_value'] = item.get(param_key, np.nan)
                pca_metadata.append(meta) # Append metadata dictionary

        item_idx += 1 # Increment loop counter


    # --- Create Final Matrix if Valid States Found ---
    if len(valid_flat_states) > 0:
         # Double check if all collected states have the same length (should be target_vector_size)
         first_len = valid_flat_states[0].size
         all_same_len = True
         state_idx_check = 1
         while state_idx_check < len(valid_flat_states):
              if valid_flat_states[state_idx_check].size != first_len:
                   all_same_len = False
                   break # Exit loop if inconsistency found
              state_idx_check += 1

         if all_same_len:
              # Stack valid vectors vertically to create the matrix [samples, features]
              try:
                  final_state_matrix = np.vstack(valid_flat_states)
                  print(f"  ✅ Prepared matrix for PCA with shape: {final_state_matrix.shape}")
                  pca_data_prepared = True # Mark data as ready
              except MemoryError as e_mem_stack:
                   print(f"❌ MemoryError stacking state vectors ({len(valid_flat_states)} vectors of size {first_len}). Cannot perform PCA.")
                   pca_data_prepared = False
                   pca_error = True # Treat as error if stacking fails
              except Exception as e_vstack:
                   print(f"❌ Error stacking state vectors for PCA: {e_vstack}")
                   pca_data_prepared = False
                   pca_error = True # Treat as error
         else:
              # This case should ideally not happen if length check during collection works
              print("  ❌ Error: Inconsistent vector lengths found after filtering for PCA.")
              lengths = [arr.size for arr in valid_flat_states]
              print("   Lengths found:", set(lengths))
              pca_data_prepared = False
              pca_error = True # Treat as error
    else:
         # No valid states found for the target N
         print(f"  ⚠️ No valid final state vectors found for PCA (N={N_for_landscape}) after filtering.")
         pca_data_prepared = False # Cannot proceed

# --- Perform PCA ---
if not pca_error and pca_data_prepared:
    # Check if enough samples for requested components
    num_pca_components_req = config.get("PCA_COMPONENTS", 3) # Get requested components from config
    min_samples_needed = max(2, num_pca_components_req) # Need at least 2 samples, and at least as many as components
    num_samples_avail = final_state_matrix.shape[0]

    if num_samples_avail < min_samples_needed:
        print(f"❌ Error: Not enough valid states ({num_samples_avail}) for PCA (need ≥ {min_samples_needed}).")
        pca_error = True
    else:
         # --- Standardization ---
         # Standardize data (mean=0, variance=1) before applying PCA
         print("  Standardizing data (mean=0, variance=1)...")
         scaler = StandardScaler()
         scaled_final_state_matrix = scaler.fit_transform(final_state_matrix)
         print("  Standardization complete.")

         # --- PCA Fitting ---
         # Determine number of components, ensuring it doesn't exceed data dimensions
         # Max components = min(n_samples, n_features)
         max_possible_components = min(scaled_final_state_matrix.shape[0], scaled_final_state_matrix.shape[1])
         num_pca_components = min(num_pca_components_req, max_possible_components)

         # Check if at least 2 components are possible for plotting
         can_plot_pca = num_pca_components >= 2
         if not can_plot_pca:
             print(f"⚠️ Warning: Cannot perform 2D PCA plot (only {num_pca_components} component possible/requested).")
             # Optionally run PCA anyway if 1 component is useful
             # pca_error = True # Set error if plot is essential

         if not pca_error: # Proceed only if plotting is possible or PCA itself is desired
             print(f"  Fitting PCA model (n_components={num_pca_components})...")
             pca_model = PCA(n_components=num_pca_components)
             try:
                 # Fit PCA model and transform the data
                 pca_transformed_data = pca_model.fit_transform(scaled_final_state_matrix)
                 # Get explained variance ratios
                 explained_variance_ratios = pca_model.explained_variance_ratio_
                 print(f"  PCA fitting complete.")
                 # Format explained variance for printing
                 explained_var_str = [f'{v:.4f}' for v in explained_variance_ratios]
                 print(f"  Explained variance per component: {explained_var_str}")
                 total_explained_variance = explained_variance_ratios.sum()
                 print(f"  Total variance explained by {num_pca_components} components: {total_explained_variance:.4f}")

                 # --- Plot PCA Results (Color by parameter value for each model) ---
                 if can_plot_pca:
                     # Combine PCA results (first 2 components) with metadata
                     pca_plot_df = pd.DataFrame(pca_transformed_data[:, :2], columns=['PC1', 'PC2'])
                     meta_df = pd.DataFrame(pca_metadata) # Convert metadata list to DataFrame
                     # Ensure indices align before concatenation (should align if data wasn't filtered after stacking)
                     if len(pca_plot_df) == len(meta_df):
                         pca_plot_df.index = meta_df.index
                         pca_plot_df = pd.concat([pca_plot_df, meta_df], axis=1)
                     else:
                          warnings.warn("PCA results and metadata have different lengths. Plotting may be incorrect.", RuntimeWarning)
                          # Attempt merge based on index if possible, otherwise skip plotting
                          pca_plot_df = pd.DataFrame() # Make empty to skip plot


                     # Find unique models present in the PCA data
                     models_in_pca = []
                     if not pca_plot_df.empty and 'model' in pca_plot_df.columns:
                          models_in_pca = pca_plot_df['model'].unique()
                     num_models_pca = len(models_in_pca)

                     if num_models_pca > 0:
                         # Create subplots: one row, one column per model
                         fig_pca, axes_pca = plt.subplots(1, num_models_pca, figsize=(7 * num_models_pca, 6), squeeze=False)
                         axes_pca = axes_pca.flatten() # Ensure axes_pca is iterable

                         plot_idx_pca = 0
                         while plot_idx_pca < num_models_pca:
                             model = models_in_pca[plot_idx_pca]
                             ax = axes_pca[plot_idx_pca] # Select subplot axis
                             # Filter PCA data for the current model
                             model_pca_data = pca_plot_df[pca_plot_df['model'] == model]

                             if not model_pca_data.empty:
                                 # Get parameter name and values for coloring
                                 # Check if metadata columns exist
                                 if 'parameter_name' in model_pca_data.columns and 'parameter_value' in model_pca_data.columns:
                                     param_name_plot = model_pca_data['parameter_name'].iloc[0] # Assume consistent name within model
                                     param_values_plot = model_pca_data['parameter_value']

                                     # Decide whether to use log scale for color map (e.g., for WS 'p')
                                     use_log_color = (model == 'WS') # Example condition
                                     if use_log_color:
                                         # Calculate log10, handle potential zeros or negative values safely
                                         color_values = np.log10(np.maximum(param_values_plot.astype(float), 1e-6)) # Use maximum to avoid log(0)
                                         color_label = f"log10({param_name_plot})" # Label for color bar
                                     else:
                                         # Use linear scale for color
                                         color_values = param_values_plot.astype(float)
                                         color_label = param_name_plot # Label for color bar

                                     # Create scatter plot for this model
                                     scatter = ax.scatter(model_pca_data['PC1'], model_pca_data['PC2'],
                                                          c=color_values, cmap='viridis', # Color points by parameter value
                                                          s=10, alpha=0.7) # Adjust size/transparency

                                     # Add labels and title using explained variance
                                     pc1_var_label = f"{explained_variance_ratios[0]*100:.1f}%"
                                     pc2_var_label = f"{explained_variance_ratios[1]*100:.1f}%"
                                     ax.set_xlabel(f"PC 1 ({pc1_var_label})")
                                     ax.set_ylabel(f"PC 2 ({pc2_var_label})")
                                     ax.set_title(f"PCA of Final States ({model}, N={N_for_landscape})")
                                     ax.grid(True, linestyle=':')

                                     # Add colorbar to the plot
                                     cbar = fig_pca.colorbar(scatter, ax=ax, orientation='vertical')
                                     cbar.set_label(color_label, rotation=270, labelpad=15)
                                 else:
                                     # Fallback if metadata columns are missing
                                     ax.scatter(model_pca_data['PC1'], model_pca_data['PC2'], s=10, alpha=0.7)
                                     ax.set_title(f"{model}\n(Metadata missing)")
                                     warnings.warn(f"Metadata columns missing for model {model} in PCA plot.", RuntimeWarning)
                             else:
                                  # Handle case where model has no data after filtering
                                  ax.set_title(f"{model}\n(No PCA data)")

                             plot_idx_pca += 1 # Increment subplot index

                         # Adjust layout and save the combined figure
                         plt.tight_layout()
                         pca_plot_filename = f"{exp_name}_pca_landscape_comparison.png"
                         pca_plot_filepath = os.path.join(output_dir, pca_plot_filename)
                         try:
                             fig_pca.savefig(pca_plot_filepath, dpi=150, bbox_inches='tight')
                             print(f"  ✅ PCA comparison plot saved to: {pca_plot_filepath}")
                         except Exception as e_save_pca:
                              print(f"❌ Error saving PCA plot: {e_save_pca}")
                         plt.show() # Display the plot
                         plt.close(fig_pca) # Close figure

                     else: # num_models_pca == 0
                          print("  ⚠️ No models found in PCA data to plot.")

             except Exception as e_pca_fit:
                 print(f"❌ Error during PCA fitting/transform: {e_pca_fit}")
                 traceback.print_exc(limit=1)
                 pca_error = True # Mark as error

# --- Final Error Handling Messages ---
elif not pca_error and not pca_data_prepared:
    print("❌ Skipping PCA calculation: Data preparation failed (no valid vectors found or processed).")
elif pca_error:
    # Message should have been printed already if pca_error was set
    if not ('RUN_PCA_ANALYSIS' in config and not config['RUN_PCA_ANALYSIS']): # Avoid repeating skip message if disabled by flag
         print("❌ Skipping PCA calculation due to errors or flags.")

print("\n✅ Cell 12: PCA analysis completed (or attempted).")

In [None]:
# Cell 13: Synthesis and Theoretical Summary (Emergenics - Conceptual)
# Description: Creates markdown text summarizing experimental findings and
#              articulating the Emergenics theoretical framework using thermodynamic analogies.
# NOTE: This cell reflects an older summary style based on beta/variance FSS,
#       kept for historical context but superseded by Cell 14's Chi FSS summary.

import pandas as pd # Need pandas for pd.notna check
import numpy as np # Need numpy for np.* types

print("\n--- Cell 13: Synthesis & Theoretical Framework (Conceptual - Pre Chi FSS) ---")
print("⚠️ Note: This summary reflects an older analysis state (pre-Chi FSS). Cell 14 provides the final Phase 1 summary.")

# --- Attempt to load values for placeholders ---
# These values might not exist or might be from older, less reliable analyses
beta_val_str = "N/A" # Default string
# Check if the variable exists in the global scope
if 'global_beta_exponent' in globals():
     beta_val = globals()['global_beta_exponent']
     # Check if the value is not None and is a finite number
     if beta_val is not None and pd.notna(beta_val):
          try:
               beta_val_str = f"{beta_val:.3f}" # Format to 3 decimal places
          except (TypeError, ValueError):
               pass # Keep N/A if formatting fails

pc_val_str = "N/A" # Default string
if 'global_p_c_estimate' in globals():
     pc_val = globals()['global_p_c_estimate']
     if pc_val is not None and pd.notna(pc_val):
          try:
               pc_val_str = f"{pc_val:.4f}" # Format to 4 decimal places
          except (TypeError, ValueError):
               pass # Keep N/A

pca_var_str = "N/A" # Default string
# Check if total_explained_variance exists (set during PCA in Cell 12)
if 'total_explained_variance' in globals():
     pca_var = globals()['total_explained_variance']
     if pca_var is not None and pd.notna(pca_var):
         try:
             pca_var_str = f"{pca_var * 100.0:.1f}%" # Format as percentage
         except (TypeError, ValueError):
             pass # Keep N/A

pca_comps_str = "N/A" # Default string
# Check if config exists and contains the PCA_COMPONENTS key
if 'config' in globals() and isinstance(config, dict):
     pca_comps = config.get("PCA_COMPONENTS") # Use .get for safe access
     if pca_comps is not None:
          pca_comps_str = str(pca_comps)


# Define summary text using f-string for dynamic values
summary_markdown_text_conceptual = f"""
# Emergenics: Synthesis & Theoretical Framework (Conceptual Summary)

## Experimental Findings (Based on Initial Analysis - Pre-Chi FSS)

The computational experiments provide empirical support for the Emergenics hypothesis, although initial analysis methods required refinement.

- **Parametric Sweep (Watts-Strogatz):**
  Varying the rewiring probability *p* induced a clear phase transition in the 5D Network Automaton's behavior, observed via the `variance_norm` order parameter. The system transitioned from a high-variance state (diverse dynamics) at low *p* to a low-variance state (homogenized dynamics) at high *p*.
  - **Critical Point:** Initial estimation near *p_c* ≈ {pc_val_str} (though subsequent Chi FSS provided a more reliable value).
  - **Critical Scaling:** Direct FSS on the order parameter (`variance_norm`) suggested power-law scaling but yielded potentially unreliable exponents (e.g., **β ≈ {beta_val_str}**). This indicated the need for more sensitive observables like susceptibility.

- **Universality Testing (WS, SBM, RGG):**
  Analysis across different graph models revealed similar topology-driven transitions, supporting the universality of the Emergenics principle that structure controls dynamics. However, quantitative comparison of critical exponents (see Cell 11.3, Cell 14) revealed significant differences, pointing towards **distinct universality classes**.

- **Attractor Landscape (PCA):**
  PCA performed on the high-dimensional flattened final state vectors (if successful):
  - **High Dimensionality:** Indicated by the fact that the top {pca_comps_str} principal components explained only ~{pca_var_str} of the total variance, confirming the system operates in a high-dimensional state space.
  - **Topological Influence:** The distribution of final states in the PCA projection showed dependence on the topological control parameter (e.g., *p*), indicating that topology continuously shapes the accessible attractor landscape.

## Theoretical Framework: Computational Thermodynamics

Emergenics interprets these findings through a thermodynamic lens:

- **Order Parameter:** Measures the degree of computational order/uniformity (e.g., `variance_norm`). Low variance = uniform/ordered, high variance = diverse/disordered.
- **Control Parameter:** Topology (*p*, *p_intra*, *r*) acts like temperature or another external field, tuning the system between computational phases.
- **Phase Transition:** The sharp change near the critical point (*p_c*, *r_c*) marks a shift between computational regimes (e.g., from locally processed information to globally integrated states).
- **Critical Exponents (γ, ν, β):** Quantify universal scaling behavior near the transition, linking computational dynamics to principles of statistical mechanics and universality. Differences in exponents classify different computational regimes.
- **State Space:** The high-dimensional space (partially revealed by PCA/UMAP) represents the system's computational capacity or 'phase space'. Structure constrains the dynamics within this space.

## Conclusion (Conceptual): Structure IS Computation

This work demonstrates computationally that network topology acts as a fundamental control parameter, inducing quantifiable phase transitions in the emergent dynamics of a novel 5D Network Automaton. The identification of critical points and scaling exponents (particularly γ and ν from Chi FSS) provides strong support for the Emergenics framework. The system exhibits rich, high-dimensional behavior influenced by network structure, offering a powerful paradigm for understanding and potentially designing computation in complex networks. The finding of distinct universality classes adds significant depth to this picture.

---

**Next Steps (Post Phase 1):**
1. Further analyze Phase 2 results (information, landscape, perturbation).
2. Develop more sophisticated computational metrics (Phase 2).
3. Explore finite-size scaling corrections.
4. Formulate quantitative design principles linking structure to function (Phase 3).
5. Apply framework to specific computational tasks (Phase 4).
"""

# Print the summary to the console
print(summary_markdown_text_conceptual)
# Store for saving or later use if needed, though Cell 14 is the primary summary
global_summary_markdown_text_conceptual = summary_markdown_text_conceptual

print("✅ Cell 13: Conceptual Synthesis and Theoretical Summary generated.")

In [None]:
# Cell 14: Synthesis & Summary (Phase 1 Completion - Final v3)
# Description: Summarizes Phase 1 findings: criticality via Chi FSS, LACK of universality,
#              energy checks, and sensitivity. Removes mention of skipped PCA.

import os
import numpy as np
import pandas as pd
import json
import warnings

print("\n--- Cell 14: Synthesis & Summary (Phase 1 Completion - Final v3) ---")

# --- Gather Data Safely ---
# Load config dictionary if available
config = {}
if 'config' in globals() and isinstance(globals()['config'], dict):
    config = globals()['config']
else:
    warnings.warn("Config dictionary not found in Cell 14. Summary may use defaults or be incomplete.", RuntimeWarning)

# Extract necessary info from config safely using .get()
exp_name_summary = config.get('EXPERIMENT_NAME', "N/A_Phase1_Experiment")
output_dir_summary = config.get('OUTPUT_DIR', ".") # Default to current dir if not set
primary_metric_summary = config.get('PRIMARY_ORDER_PARAMETER', 'N/A')
sensitivity_param = config.get('SENSITIVITY_RULE_PARAM', 'N/A')
energy_checked = config.get('CALCULATE_ENERGY', False);
history_stored = config.get('STORE_ENERGY_HISTORY', False)

# --- Helper Function (copied from Cell 11.3) ---
def format_metric(value, fmt):
    """Safely formats a numerical value using a format string."""
    is_valid_number = False
    if value is not None:
        if isinstance(value, (int, float)) and np.isfinite(value): is_valid_number = True
    if is_valid_number:
        try: return fmt % value
        except (TypeError, ValueError): return "Format Error"
    else: return "N/A"

# --- Get Results from Global Variables (set by previous analysis cells) ---
# Use .get() on globals() dictionary to access variables safely
ws_chi_results = globals().get('global_optuna_fss_chi_results', {})
sbm_chi_results = globals().get('global_optuna_fss_chi_sbm_results', {})
rgg_chi_results = globals().get('global_optuna_fss_chi_rgg_results', {})

# Check if sensitivity analysis was performed by looking for the output plot file
sensitivity_plot_path = os.path.join(output_dir_summary, f"{exp_name_summary}_sensitivity_pc_vs_{sensitivity_param}.png")
sensitivity_analyzed = os.path.exists(sensitivity_plot_path)


# --- Extract specific values safely using .get() from result dictionaries ---
pc_ws=ws_chi_results.get('pc',np.nan); gamma_ws=ws_chi_results.get('gamma',np.nan); nu_ws=ws_chi_results.get('nu',np.nan)
ws_success=ws_chi_results.get('success',False)

pc_sbm=sbm_chi_results.get('pc',np.nan); gamma_sbm=sbm_chi_results.get('gamma',np.nan); nu_sbm=sbm_chi_results.get('nu',np.nan)
sbm_success=sbm_chi_results.get('success',False)

pc_rgg=rgg_chi_results.get('pc',np.nan); gamma_rgg=rgg_chi_results.get('gamma',np.nan); nu_rgg=rgg_chi_results.get('nu',np.nan)
rgg_success=rgg_chi_results.get('success',False)

# --- Calculate Universality Statistics ---
# Collect valid exponent values only from successful runs
gamma_values = []
if ws_success and pd.notna(gamma_ws): gamma_values.append(gamma_ws)
if sbm_success and pd.notna(gamma_sbm): gamma_values.append(gamma_sbm)
if rgg_success and pd.notna(gamma_rgg): gamma_values.append(gamma_rgg)

nu_values = []
if ws_success and pd.notna(nu_ws): nu_values.append(nu_ws)
if sbm_success and pd.notna(nu_sbm): nu_values.append(nu_sbm)
if rgg_success and pd.notna(nu_rgg): nu_values.append(nu_rgg)

# Count how many models had successful exponent calculations
models_compared_count = len(gamma_values) # Count based on successful gamma values

# Calculate stats only if enough data points exist (>= 2)
gamma_mean = np.nan; gamma_std = np.nan; gamma_rsd = np.inf # Initialize
if models_compared_count >= 2:
    gamma_mean=np.mean(gamma_values); gamma_std=np.std(gamma_values)
    # Calculate RSD = (StdDev / |Mean|) * 100%, check for zero mean
    if gamma_mean != 0 and pd.notna(gamma_mean) and pd.notna(gamma_std):
        gamma_rsd=(gamma_std / abs(gamma_mean)) * 100.0

nu_mean = np.nan; nu_std = np.nan; nu_rsd = np.inf # Initialize
# Use nu_values list length for nu comparison, as gamma/nu might have different success rates
if len(nu_values) >= 2:
    nu_mean=np.mean(nu_values); nu_std=np.std(nu_values)
    # Calculate RSD for Nu
    if nu_mean != 0 and pd.notna(nu_mean) and pd.notna(nu_std):
        nu_rsd=(nu_std / abs(nu_mean)) * 100.0


# --- Generate Summary Text using an f-string and list joining ---
summary_lines = [] # Initialize empty list for lines of summary

summary_lines.append(f"# Emergenics Phase 1 Summary: {exp_name_summary}\n")
summary_lines.append("## Objective:")
summary_lines.append("Rigorously analyze topology-driven phase transitions in a 5D Network Automaton across WS, SBM, and RGG models using FSS on Susceptibility (χ) via Optuna. Assess universality and sensitivity.")

summary_lines.append("\n## Key Findings:")
summary_lines.append("- **Phase Transitions Confirmed:** All models exhibit clear computational phase transitions controlled by topology (p, p_intra, r), observable via order parameters like variance_norm.")
summary_lines.append("- **Susceptibility (χ) FSS Success:** Optuna-driven FSS on χ yielded robust critical point and exponent estimates for each model:")
# Format results using helper function
summary_lines.append(f"  - **WS:**  p_c ≈ {format_metric(pc_ws, '%.5f')}, γ ≈ {format_metric(gamma_ws, '%.3f')}, ν ≈ {format_metric(nu_ws, '%.3f')} ({'Success' if ws_success else 'Failed'})")
summary_lines.append(f"  - **SBM:** p_c ≈ {format_metric(pc_sbm, '%.5f')}, γ ≈ {format_metric(gamma_sbm, '%.3f')}, ν ≈ {format_metric(nu_sbm, '%.3f')} ({'Success' if sbm_success else 'Failed'})")
summary_lines.append(f"  - **RGG:** r_c ≈ {format_metric(pc_rgg, '%.5f')}, γ ≈ {format_metric(gamma_rgg, '%.3f')}, ν ≈ {format_metric(nu_rgg, '%.3f')} ({'Success' if rgg_success else 'Failed'})")

summary_lines.append("- **Universality Analysis (Based on χ FSS):**")
if models_compared_count >= 2:
    # Report stats if calculated
    summary_lines.append(f"  - Models Compared (Successful γ fit): {models_compared_count}")
    summary_lines.append(f"  - Gamma (γ): Mean={format_metric(gamma_mean, '%.3f')} ± {format_metric(gamma_std, '%.3f')} (RSD: {format_metric(gamma_rsd, '%.1f')}%)")
    summary_lines.append(f"  - Nu (ν):    Mean={format_metric(nu_mean, '%.3f')} ± {format_metric(nu_std, '%.3f')} (RSD: {format_metric(nu_rsd, '%.1f')}%)") # Use nu_values count
    # Conclusion based on RSD - using the threshold from Phase 1 results (high RSD -> distinct)
    if gamma_rsd > 25 or nu_rsd > 25: # Check if *either* shows high variation
        summary_lines.append("  - **Conclusion: Significant variation in exponents (RSD > 25%) strongly indicates WS, SBM, RGG belong to DIFFERENT universality classes.**")
    elif gamma_rsd < 15 and nu_rsd < 15: # Requires low RSD for *both* to suggest same class
        summary_lines.append("  - **Conclusion: Low variation suggests a single universality class (γ≈{:.3f}, ν≈{:.3f}).**".format(gamma_mean, nu_mean)) # Format means if consistent
    else: # Intermediate case
        summary_lines.append("  - **Conclusion: Moderate variation in exponents (RSD between 15-25%) makes universality questionable; distinct classes remain likely.**")
else:
    # Message if not enough successful models for comparison
    summary_lines.append(f"  - Comparison not performed (requires successful exponent results from >= 2 models, found {models_compared_count}).")

summary_lines.append("- **Sensitivity:**")
if sensitivity_analyzed:
    # Report sensitivity findings if plot exists
    summary_lines.append(f"  - Assessed impact of '{sensitivity_param}' on p_c ({config.get('TARGET_MODEL_SENS','WS')} model).") # Add model tested
    summary_lines.append(f"  - Conclusion: Critical point shifts predictably with '{sensitivity_param}', but transition phenomenon persists (see plot).")
else:
    # Message if sensitivity analysis was skipped or failed
    summary_lines.append(f"  - Sensitivity analysis for '{sensitivity_param}' not completed or plot not found.")

summary_lines.append("- **Energy & Dynamics:**")
# Report status based on config flags
if energy_checked:
    summary_lines.append(f"  - Final energy calculated (type: {config.get('ENERGY_FUNCTIONAL_TYPE', 'N/A')}).")
else: summary_lines.append("  - Final energy calculation disabled.")
if history_stored:
    summary_lines.append("  - Energy monotonicity check performed (details in Cell 11.4 output).")
else: summary_lines.append("  - Energy monotonicity check skipped (STORE_ENERGY_HISTORY=False).")

summary_lines.append("- **Other Analysis Notes:**")
summary_lines.append(f"  - FSS on primary order parameter ('{primary_metric_summary}') yielded poor collapse; χ FSS proved more suitable.")
# Remove mention of PCA failure as it was fixed/attempted in Phase 2 context
# summary_lines.append("  - PCA analysis of final states was skipped or failed.")

summary_lines.append("\n## Overall Phase 1 Conclusion:")
summary_lines.append("Phase 1 successfully used GPU acceleration and robust analysis (Optuna FSS on χ) to quantify topology-driven phase transitions in WS, SBM, and RGG models for the 5D HDC/RSV Network Automaton.")
# Final conclusion adjusted based on RSD - check for high RSD indicating distinct classes
if gamma_rsd > 25 or nu_rsd > 25:
     summary_lines.append(f"**Crucially, evidence strongly suggests these models belong to DISTINCT universality classes (High RSDs: γ≈{format_metric(gamma_rsd, '%.1f')}%, ν≈{format_metric(nu_rsd, '%.1f')}%).**")
     summary_lines.append("This indicates the *type* of network structure fundamentally alters the critical computational dynamics.")
elif gamma_rsd < 15 and nu_rsd < 15:
      summary_lines.append(f"**Evidence supports UNIVERSALITY, with consistent critical exponents (γ≈{format_metric(gamma_mean, '%.3f')}, ν≈{format_metric(nu_mean, '%.3f')}) across these distinct topological classes.**")
      summary_lines.append("This suggests fundamental, shared principles governing computational emergence in these networks.")
else: # Intermediate case
     summary_lines.append(f"**Universality is questionable (Moderate RSDs: γ≈{format_metric(gamma_rsd, '%.1f')}%, ν≈{format_metric(nu_rsd, '%.1f')}%). Distinct classes remain likely.**")

summary_lines.append("Sensitivity analysis confirmed the robustness of the transition phenomenon. The Emergenics framework is validated, providing a solid quantitative foundation for Phase 2 (exploring computational capabilities) and Phase 3 (design principles).")

# --- Save Summary ---
summary_text = "\n".join(summary_lines) # Join lines into single string
# Ensure output directory exists before saving
summary_filename_phase1 = "Phase1_Summary_Error.md" # Default filename on error
if output_dir_summary is not None and exp_name_summary is not None and output_dir_summary != ".":
    try:
        os.makedirs(output_dir_summary, exist_ok=True) # Create dir if needed
        summary_filename_phase1 = os.path.join(output_dir_summary, f"{exp_name_summary}_summary_phase1.md")
        with open(summary_filename_phase1, 'w') as f_write_summary:
             f_write_summary.write(summary_text)
        print(f"\n✅ Saved Phase 1 summary document to: {summary_filename_phase1}")
    except Exception as e_save_summary:
        print(f"❌ Error saving Phase 1 summary document: {e_save_summary}")
else:
     print("\n⚠️ Could not save Phase 1 summary document (Output directory or experiment name invalid).")


# --- Print Summary to Console ---
print("\n" + "="*80); print(summary_text); print("="*80)
print("\n--- Phase 1 Analysis & Summary Generation Complete ---")
print("Cell 14 execution complete.")

In [None]:
# Cell 15: Phase 2 Synthesis & Summary
# Description: Generates a markdown summary of Phase 2 findings, comparing
#              computational characteristics (information processing, attractors
#              via UMAP/PCA, perturbation response) across the different
#              universality classes identified in Phase 1.

import os
import numpy as np
import pandas as pd
import json
import warnings

print("\n--- Cell 15: Phase 2 Synthesis & Summary ---")

# --- Gather Data Safely ---
# Load config dictionary if available
config = {}
if 'config' in globals() and isinstance(globals()['config'], dict):
    config = globals()['config']
else:
    warnings.warn("Config dictionary not found in Cell 15. Phase 2 Summary may be incomplete.", RuntimeWarning)

# Extract necessary info from config safely using .get()
# Use Phase 2 experiment name and output dir from config
exp_name_summary = config.get('EXPERIMENT_NAME', "Phase2_N/A")
output_dir_summary = config.get('OUTPUT_DIR', ".") # Default to current dir if not set

# --- Check if Phase 2 analysis results exist (by checking for expected output plots/files) ---
# Construct expected file paths based on Phase 2 naming conventions
info_plot_path = os.path.join(output_dir_summary, f"{exp_name_summary}_info_metrics_comparison.png")
umap_plot_path = os.path.join(output_dir_summary, f"{exp_name_summary}_umap_landscape_comparison.png")
pca_plot_path = os.path.join(output_dir_summary, f"{exp_name_summary}_pca_landscape_comparison.png")
pert_plot_path = os.path.join(output_dir_summary, f"{exp_name_summary}_perturbation_response_comparison.png")

# Check existence of plot files as indicators of successful analysis steps
info_plot_exists = os.path.exists(info_plot_path)
umap_plot_exists = os.path.exists(umap_plot_path)
pca_plot_exists = os.path.exists(pca_plot_path)
pert_plot_exists = os.path.exists(pert_plot_path)

# --- Load key metrics from Phase 1 (if path was stored in Phase 2 config) ---
phase1_metrics = {}
# Retrieve path to Phase 1 metrics file stored during Phase 2 config setup
phase1_key_metrics_path = config.get('phase1_key_metrics_path')
if phase1_key_metrics_path and os.path.exists(phase1_key_metrics_path):
    try:
        with open(phase1_key_metrics_path, 'r') as f_p1_metrics:
             phase1_metrics = json.load(f_p1_metrics)
        print(f"  Loaded Phase 1 key metrics from: {phase1_key_metrics_path}")
    except Exception as e_load_p1m:
         warnings.warn(f"Could not load Phase 1 key metrics from {phase1_key_metrics_path}: {e_load_p1m}", RuntimeWarning)
         phase1_metrics = {} # Reset if load fails
else:
     print("  Phase 1 key metrics file path not found or file doesn't exist. Cannot display Phase 1 exponents.")

# --- Helper for formatting ---
def format_metric(value, fmt):
    """Safely formats a numerical value using a format string."""
    is_valid_number = False
    if value is not None:
        if isinstance(value, (int, float)) and np.isfinite(value): is_valid_number = True
    if is_valid_number:
        try: return fmt % value
        except (TypeError, ValueError): return "Format Error"
    else: return "N/A"

# Extract Phase 1 exponents safely from loaded metrics
pc_ws=phase1_metrics.get('final_pc_ws_chi', np.nan)
gamma_ws=phase1_metrics.get('final_gamma_ws_chi', np.nan); nu_ws=phase1_metrics.get('final_nu_ws_chi', np.nan)
pc_sbm=phase1_metrics.get('final_pc_sbm_chi', np.nan)
gamma_sbm=phase1_metrics.get('final_gamma_sbm_chi', np.nan); nu_sbm=phase1_metrics.get('final_nu_sbm_chi', np.nan)
pc_rgg=phase1_metrics.get('final_pc_rgg_chi', np.nan)
gamma_rgg=phase1_metrics.get('final_gamma_rgg_chi', np.nan); nu_rgg=phase1_metrics.get('final_nu_rgg_chi', np.nan)


# --- Generate Summary Text ---
summary_lines = [f"# Emergenics Phase 2 Summary: {exp_name_summary}\n"]
summary_lines.append("## Objective:")
summary_lines.append("Characterize the computational properties associated with the different phases (ordered, critical, disordered) and distinct universality classes (WS, SBM, RGG) identified in Phase 1.")

summary_lines.append("\n## Phase 1 Recap - Distinct Universality Classes:")
summary_lines.append("Phase 1 concluded that WS, SBM, and RGG models likely belong to **distinct universality classes** based on differing critical exponents derived from Susceptibility (χ) FSS:")
summary_lines.append(f"  - **WS:**  (p_c≈{format_metric(pc_ws, '.4f')}, γ≈{format_metric(gamma_ws, '.3f')}, ν≈{format_metric(nu_ws, '.3f')})")
summary_lines.append(f"  - **SBM:** (p_c≈{format_metric(pc_sbm, '.4f')}, γ≈{format_metric(gamma_sbm, '.3f')}, ν≈{format_metric(nu_sbm, '.3f')})")
summary_lines.append(f"  - **RGG:** (r_c≈{format_metric(pc_rgg, '.4f')}, γ≈{format_metric(gamma_rgg, '.3f')}, ν≈{format_metric(nu_rgg, '.3f')})")

summary_lines.append("\n## Phase 2 Findings - Computational Characteristics:")

summary_lines.append("\n### 1. Information Processing:")
if info_plot_exists:
    summary_lines.append("  - Analysis performed using `mean_final_state_entropy` (average Shannon entropy across state dimensions).")
    summary_lines.append("  - **Observation:** *[Manually interpret plot '{}_info_metrics_comparison.png']:* ".format(exp_name_summary))
    summary_lines.append("    - Describe how entropy changes vs. the control parameter (p, p_intra, r) for each model.")
    summary_lines.append("    - Does entropy peak near the critical point (p_c, r_c)?")
    summary_lines.append("    - Are the peak heights or shapes significantly different between WS, SBM, RGG?")
    summary_lines.append("  - **Interpretation:** Differences in entropy profiles likely reflect varying levels of state diversity and predictability across phases and universality classes. A peak near criticality might indicate maximal dynamic complexity. Distinct profiles reinforce the idea of different computational regimes.")
else:
    summary_lines.append("  - Analysis skipped, failed, or plot not found.")

summary_lines.append("\n### 2. Attractor Landscape:")
umap_analysis_done = config.get('RUN_UMAP_ANALYSIS', False) and umap_plot_exists
pca_analysis_done = config.get('RUN_PCA_ANALYSIS', False) and pca_plot_exists
if umap_analysis_done or pca_analysis_done:
    summary_lines.append("  - Explored using dimensionality reduction on final state vectors near sub-critical, critical, and super-critical regimes.")
    if umap_analysis_done: summary_lines.append(f"  - UMAP visualizations generated (see '{exp_name_summary}_umap_landscape_comparison.png').")
    if pca_analysis_done: summary_lines.append(f"  - PCA visualizations generated (see '{exp_name_summary}_pca_landscape_comparison.png').")
    summary_lines.append("  - **Observation:** *[Manually interpret UMAP/PCA plots]:*")
    summary_lines.append("    - Describe the structure in the low-dimensional embedding (e.g., number/separation of clusters, shape of manifold).")
    summary_lines.append("    - How does this structure change across the phase transition (sub-critical vs. critical vs. super-critical)?")
    summary_lines.append("    - Are there qualitative differences in landscape structure between WS, SBM, and RGG at similar relative distances from their respective critical points?")
    summary_lines.append("  - **Interpretation:** Visualizations provide insights into the geometry of the system's accessible state space. Changes across the transition reflect shifts in computational dynamics. Differences between models highlight how underlying topology shapes the attractor landscape, consistent with distinct universality classes.")
else:
    summary_lines.append("  - Analysis skipped, failed, or plots not found.")

summary_lines.append("\n### 3. Perturbation Response:")
if pert_plot_exists:
    pert_metric = config.get('PERTURBATION_METRICS_TO_CALC', ['relaxation_time'])[0] # Get first metric plotted
    summary_lines.append(f"  - Assessed system response to transient node clamping using '{pert_metric}'.")
    summary_lines.append("  - **Observation:** *[Manually interpret plot '{}_perturbation_response_comparison.png']:* ".format(exp_name_summary))
    summary_lines.append(f"    - How does {pert_metric} change vs. the control parameter?")
    summary_lines.append(f"    - Does {pert_metric} show a peak or significant change near the critical point (indicative of critical slowing down or heightened sensitivity)?")
    summary_lines.append(f"    - Are there differences in the response profiles (peak height, width) between WS, SBM, RGG?")
    summary_lines.append("  - **Interpretation:** Perturbation response quantifies system stability and information propagation dynamics. Sensitivity often peaks near criticality. Differences across models reinforce that distinct structural classes handle perturbations differently, impacting computational robustness and information flow.")
else:
    summary_lines.append("  - Analysis skipped, failed, or plot not found.")

summary_lines.append("\n## Overall Phase 2 Conclusion:")
summary_lines.append("Phase 2 provided initial characterizations of the computational properties associated with the distinct universality classes identified in Phase 1. Key takeaways:")
summary_lines.append("- **Distinct Computational Regimes:** Analyses of information entropy, attractor landscapes (via UMAP/PCA), and perturbation response suggest qualitative and potentially quantitative differences in computational behavior between the WS, SBM, and RGG structural classes, supporting the Phase 1 finding of distinct universality.")
summary_lines.append("- **Criticality & Computation:** The critical region appears associated with unique computational characteristics (e.g., potentially peak entropy, heightened sensitivity to perturbations), aligning with theoretical expectations ('edge of chaos'). The exact nature varies across universality classes.")
summary_lines.append("- **Structure-Function Link Validated:** These findings further strengthen the Emergenics principle: the *type* of network structure (not just a single parameter) fundamentally shapes the *nature* and *class* of the emergent computation.")
summary_lines.append("\nThis phase provides valuable comparative insights. Phase 3 can now focus on formulating more precise structure-function relationships and developing tools for designing networks with desired emergent computational properties based on these observed class distinctions.")

# --- Save Summary ---
summary_text = "\n".join(summary_lines) # Join lines into single string
summary_filename_phase2 = "Phase2_Summary_Error.md" # Default filename on error
if output_dir_summary is not None and exp_name_summary is not None and output_dir_summary != ".":
    try:
        os.makedirs(output_dir_summary, exist_ok=True) # Create dir if needed
        summary_filename_phase2 = os.path.join(output_dir_summary, f"{exp_name_summary}_summary_phase2.md")
        with open(summary_filename_phase2, 'w') as f_write_summary_p2:
             f_write_summary_p2.write(summary_text)
        print(f"\n✅ Saved Phase 2 summary document to: {summary_filename_phase2}")
    except Exception as e_save_summary_p2:
        print(f"❌ Error saving Phase 2 summary document: {e_save_summary_p2}")
else:
     print("\n⚠️ Could not save Phase 2 summary document (Output directory or experiment name invalid).")

# --- Print Summary to Console ---
print("\n" + "="*80); print(summary_text); print("="*80)
print("\n--- Phase 2 Analysis & Summary Generation Complete ---")
print("Cell 15 execution complete.")