# EFM LSS Definitive Simulation (850³, JIT Optimized, GPU-Centric)

This notebook performs the definitive high-resolution simulation of Large-Scale Structure (LSS) formation within the Eholoko Fluxon Model (EFM) framework. This version has been heavily optimized for maximum performance on high-memory A100 GPUs, incorporating advanced techniques identified from other high-performance EFM notebooks.

**Key Optimizations Implemented:**

1.  **JIT Compilation (`@torch.jit.script`):** The core simulation functions (Laplacian, NLKG derivative, and RK4 integrator) are JIT-compiled into fused CUDA kernels, eliminating Python overhead in the main loop for maximum speed.
2.  **GPU-Centric Workflow:** All large tensors (initial fields, coordinate grids, damping masks) are created and manipulated directly on the GPU, avoiding costly CPU-GPU memory transfers.
3.  **Advanced Mixed Precision:** Tensors are stored in memory-efficient `float16` format, but all critical physics calculations are performed in `float32` to ensure numerical stability and precision, preventing overflow/underflow issues.
4.  **Vectorized Operations:** All initializations and boundary condition applications are fully vectorized to leverage the GPU's parallel processing power.
5.  **No Checkpointing:** All intermediate checkpointing is removed to dedicate 100% of I/O and compute resources to the simulation itself for a single, uninterrupted high-performance run.

## Objectives:

-   Execute the fastest possible LSS simulation on a **850³ grid** for **200,000 timesteps**.
-   Provide definitive computational evidence for EFM's 'Fluxonic Clustering' and its emergent spatial and temporal scales from first principles.

## Google Drive & Environment Setup

In [None]:
import os
import torch
import torch.nn.functional as F
import gc
import psutil
from tqdm.notebook import tqdm
import numpy as np
import time
from datetime import datetime
from scipy.fft import fftn, fftfreq, ifftn
import scipy.signal
import matplotlib.pyplot as plt

try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except ImportError:
    print("Not in Google Colab environment.")

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

print(f"PyTorch version: {torch.__version__}")
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print(f"Using GPU: {torch.cuda.get_device_name(device)}, VRAM: {torch.cuda.get_device_properties(device).total_memory / 1e9:.2f} GB")
else:
    device = torch.device('cpu')
    print("No GPU available, running on CPU.")
print(f"System RAM: {psutil.virtual_memory().total / 1e9:.2f} GB")

data_path_lss_definitive = '/content/drive/My Drive/EFM_Simulations/data/LSS_DEFINITIVE_N850_Run/'
os.makedirs(data_path_lss_definitive, exist_ok=True)
print(f"LSS 850³ Definitive Data/Plots will be saved to: {data_path_lss_definitive}")

## Configuration for 850³ Definitive LSS Simulation

In [None]:
lambda_base_sim_emergent = 2.55

config_lss_definitive = {
    'N': 850,
    'L_sim_unit': 10.0,
    'T_steps': 200000,
    'c_sim_unit': 1.0,
    'dt_cfl_factor': 0.001,
    'm_sim_unit_inv': 0.1,
    'g_sim': 0.1,
    'eta_sim': 0.01,
    'k_efm_gravity_coupling': 0.005,
    'G_sim_unit': 1.0,
    'alpha_sim': 0.7,
    'delta_sim': 0.0002,
    'seeded_perturbation_amplitude': 1.0e-3,
    'background_noise_amplitude': 1.0e-6,
    'k_seed_primary': 2 * np.pi / lambda_base_sim_emergent,
    'k_seed_secondary': 2 * np.pi / (lambda_base_sim_emergent / 4.0),
    'history_every_n_steps': 1000
}
config_lss_definitive['dx_sim_unit'] = config_lss_definitive['L_sim_unit'] / config_lss_definitive['N']
config_lss_definitive['dt_sim_unit'] = config_lss_definitive['dt_cfl_factor'] * config_lss_definitive['dx_sim_unit'] / config_lss_definitive['c_sim_unit']

config_lss_definitive['run_id'] = (
    f"LSS_DEFINITIVE_N{config_lss_definitive['N']}_T{config_lss_definitive['T_steps']}_"
    f"m{config_lss_definitive['m_sim_unit_inv']:.1e}_alpha{config_lss_definitive['alpha_sim']:.1e}_"
    f"JIT_OPTIMIZED"
)

print(f"--- EFM LSS 850³ JIT-Optimized Simulation Configuration ({config_lss_definitive['run_id']}) ---")
for key, value in config_lss_definitive.items():
    if isinstance(value, (float, np.float32, np.float64)):
        print(f"{key}: {value:.4g}")
    else:
        print(f"{key}: {value}")

## Core Simulation Functions (JIT-Compiled & Optimized)

In [None]:
# --- OPTIMIZED & JIT-COMPILED SIMULATION FUNCTIONS --- #

@torch.jit.script
def conv_laplacian_lss_gpu(phi_field: torch.Tensor, dx: float) -> torch.Tensor:
    stencil = torch.tensor([[[0.,0.,0.],[0.,1.,0.],[0.,0.,0.]],
                            [[0.,1.,0.],[1.,-6.,1.],[0.,1.,0.]],
                            [[0.,0.,0.],[0.,1.,0.],[0.,0.,0.]]],
                           dtype=phi_field.dtype, device=phi_field.device) / (dx**2)
    stencil = stencil.view(1, 1, 3, 3, 3)
    phi_padded = F.pad(phi_field.unsqueeze(0).unsqueeze(0), (1, 1, 1, 1, 1, 1), mode='circular')
    return F.conv3d(phi_padded, stencil, padding=0).squeeze(0).squeeze(0)

@torch.jit.script
def nlkg_derivative_lss_gpu(phi: torch.Tensor, phi_dot: torch.Tensor, m_sq: float, g: float, 
                            eta: float, k_gravity: float, G_gravity: float, c_sq: float, 
                            alpha_param: float, delta_param: float, dx: float) -> tuple[torch.Tensor, torch.Tensor]:
    # Cast to float32 for stable intermediate calculations
    phi_f32 = phi.to(torch.float32)
    phi_dot_f32 = phi_dot.to(torch.float32)

    lap_phi = conv_laplacian_lss_gpu(phi_f32, dx)
    potential_force = m_sq * phi_f32 + g * torch.pow(phi_f32, 3) + eta * torch.pow(phi_f32, 5)

    grad_phi_x = (torch.roll(phi_f32, shifts=-1, dims=0) - torch.roll(phi_f32, shifts=1, dims=0)) / (2 * dx)
    grad_phi_y = (torch.roll(phi_f32, shifts=-1, dims=1) - torch.roll(phi_f32, shifts=1, dims=1)) / (2 * dx)
    grad_phi_z = (torch.roll(phi_f32, shifts=-1, dims=2) - torch.roll(phi_f32, shifts=1, dims=2)) / (2 * dx)
    grad_phi_abs_sq = grad_phi_x**2 + grad_phi_y**2 + grad_phi_z**2

    alpha_term = alpha_param * phi_f32 * phi_dot_f32 * grad_phi_abs_sq
    delta_term = delta_param * torch.pow(phi_dot_f32, 2) * phi_f32
    source_gravity = 8.0 * torch.pi * G_gravity * k_gravity * torch.pow(phi_f32, 2)

    phi_ddot = c_sq * lap_phi - potential_force + alpha_term + delta_term + source_gravity
    return phi_dot, phi_ddot.to(phi.dtype)

@torch.jit.script
def update_phi_rk4_lss_gpu(phi_current: torch.Tensor, phi_dot_current: torch.Tensor, dt: float, 
                          m_sq: float, g: float, eta: float, k_gravity: float, G_gravity: float, 
                          c_sq: float, alpha_param: float, delta_param: float, dx: float) -> tuple[torch.Tensor, torch.Tensor]:
    k1_v, k1_a = nlkg_derivative_lss_gpu(phi_current, phi_dot_current, m_sq, g, eta, k_gravity, G_gravity, c_sq, alpha_param, delta_param, dx)
    k2_v, k2_a = nlkg_derivative_lss_gpu(phi_current + 0.5 * dt * k1_v, phi_dot_current + 0.5 * dt * k1_a, m_sq, g, eta, k_gravity, G_gravity, c_sq, alpha_param, delta_param, dx)
    k3_v, k3_a = nlkg_derivative_lss_gpu(phi_current + 0.5 * dt * k2_v, phi_dot_current + 0.5 * dt * k2_a, m_sq, g, eta, k_gravity, G_gravity, c_sq, alpha_param, delta_param, dx)
    k4_v, k4_a = nlkg_derivative_lss_gpu(phi_current + dt * k3_v, phi_dot_current + dt * k3_a, m_sq, g, eta, k_gravity, G_gravity, c_sq, alpha_param, delta_param, dx)

    phi_next = phi_current + (dt / 6.0) * (k1_v + 2*k2_v + 2*k3_v + k4_v)
    phi_dot_next = phi_dot_current + (dt / 6.0) * (k1_a + 2*k2_a + 2*k3_a + k4_a)
    return phi_next, phi_dot_next

def compute_diagnostics_lss_gpu(phi: torch.Tensor, phi_dot: torch.Tensor, config: dict) -> tuple[float, float]:
    with torch.no_grad():
        phi_f32, phi_dot_f32 = phi.to(torch.float32), phi_dot.to(torch.float32)
        kinetic_density = 0.5 * torch.pow(phi_dot_f32, 2)
        potential_density = 0.5 * (config['m_sim_unit_inv']**2) * torch.pow(phi_f32, 2) + 0.25 * config['g_sim'] * torch.pow(phi_f32, 4) + (1.0/6.0) * config['eta_sim'] * torch.pow(phi_f32, 6)
        grad_phi_x = (torch.roll(phi_f32, shifts=-1, dims=0) - torch.roll(phi_f32, shifts=1, dims=0)) / (2 * config['dx_sim_unit'])
        grad_phi_y = (torch.roll(phi_f32, shifts=-1, dims=1) - torch.roll(phi_f32, shifts=1, dims=1)) / (2 * config['dx_sim_unit'])
        grad_phi_z = (torch.roll(phi_f32, shifts=-1, dims=2) - torch.roll(phi_f32, shifts=1, dims=2)) / (2 * config['dx_sim_unit'])
        gradient_energy_density = 0.5 * (config['c_sim_unit']**2) * (grad_phi_x**2 + grad_phi_y**2 + grad_phi_z**2)
        total_energy = torch.sum(kinetic_density + potential_density + gradient_energy_density) * (config['dx_sim_unit']**3)
        density_norm = torch.sum(phi_f32**2).item() * config['k_efm_gravity_coupling']
    return total_energy.item(), density_norm

print("JIT-Optimized GPU-centric simulation functions defined.")

## Main Execution Block

This is the primary block that initiates the definitive 850³ LSS simulation. It will run from start to finish without interruption and save the final results for analysis.

In [None]:
if __name__ == '__main__':
    print("--- INITIATING DEFINITIVE 850³ LSS SIMULATION ---")
    # Initialize Fields Directly on GPU
    print("Initializing fields on GPU...")
    torch.manual_seed(42); np.random.seed(42)
    coords = torch.linspace(-config_lss_definitive['L_sim_unit']/2, config_lss_definitive['L_sim_unit']/2, config_lss_definitive['N'], device=device)
    X, Y, Z = torch.meshgrid(coords, coords, coords, indexing='ij')

    seeded_modes = config_lss_definitive['seeded_perturbation_amplitude'] * (
        torch.sin(config_lss_definitive['k_seed_primary'] * X) + 
        torch.sin(config_lss_definitive['k_seed_secondary'] * Y) + 
        torch.cos(config_lss_definitive['k_seed_primary'] * Z)
    )
    random_noise = torch.rand_like(X) - 0.5
    phi_current = (seeded_modes + config_lss_definitive['background_noise_amplitude'] * random_noise).to(torch.float16)
    phi_dot_current = torch.zeros_like(phi_current, dtype=torch.float16)
    del X, Y, Z, coords, seeded_modes, random_noise; gc.collect(); torch.cuda.empty_cache()

    # Prepare History Arrays
    num_history_points = config_lss_definitive['T_steps'] // config_lss_definitive['history_every_n_steps'] + 1
    energy_history = np.zeros(num_history_points)
    density_norm_history = np.zeros(num_history_points)
    history_idx = 0

    # Calculate Initial Diagnostics
    energy_history[history_idx], density_norm_history[history_idx] = compute_diagnostics_lss_gpu(phi_current, phi_dot_current, config_lss_definitive)
    print(f"Initial State: Energy={energy_history[history_idx]:.4g}, Density Norm={density_norm_history[history_idx]:.4g}")
    history_idx += 1

    # Simulation Loop
    print(f"Starting LSS simulation loop for {config_lss_definitive['T_steps']} steps...")
    sim_start_time = time.time()
    pbar = tqdm(range(config_lss_definitive['T_steps']), desc=f"LSS Sim ({config_lss_definitive['N']}³)")

    for t_step in pbar:
        # Perform RK4 update using JIT-compiled function
        phi_current, phi_dot_current = update_phi_rk4_lss_gpu(
            phi_current, phi_dot_current, config_lss_definitive['dt_sim_unit'],
            config_lss_definitive['m_sim_unit_inv']**2, config_lss_definitive['g_sim'], config_lss_definitive['eta_sim'],
            config_lss_definitive['k_efm_gravity_coupling'], config_lss_definitive['G_sim_unit'],
            config_lss_definitive['c_sim_unit']**2, config_lss_definitive['alpha_sim'], 
            config_lss_definitive['delta_sim'], config_lss_definitive['dx_sim_unit']
        )

        if torch.any(torch.isinf(phi_current)) or torch.any(torch.isnan(phi_current)):
            print(f"\nERROR: NaN/Inf detected at step {t_step + 1}! Stopping.")
            break

        if (t_step + 1) % config_lss_definitive['history_every_n_steps'] == 0:
            energy, dn = compute_diagnostics_lss_gpu(phi_current, phi_dot_current, config_lss_definitive)
            energy_history[history_idx], density_norm_history[history_idx] = energy, dn
            history_idx += 1
            pbar.set_postfix({'E': f'{energy:.3e}', 'DN': f'{dn:.3e}'})
            if np.isnan(energy) or np.isinf(energy):
                print(f"\nInstability detected. Stopping.")
                break
    
    sim_duration = time.time() - sim_start_time
    print(f"Simulation finished in {sim_duration:.2f} seconds.")

    # Save Final State and History
    final_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    final_data_filename = os.path.join(data_path_lss_definitive, f"FINAL_DATA_{config_lss_definitive['run_id']}_{final_timestamp}.npz")
    np.savez_compressed(final_data_filename,
                        phi_final_cpu=phi_current.cpu().numpy(),
                        config_lss=config_lss_definitive, 
                        energy_history=energy_history[:history_idx],
                        density_norm_history=density_norm_history[:history_idx])
    print(f"Final simulation data saved to {final_data_filename}")

    # Cleanup and Analysis
    del phi_current, phi_dot_current; gc.collect(); torch.cuda.empty_cache()
    print("\n--- SIMULATION COMPLETE. INITIATING ANALYSIS ---")
    # Assuming analysis functions are defined (not included here for brevity)
    # full_lss_analysis_and_plotting(final_data_filename, data_path_lss_definitive)
