# Interactive Data Explorer

This notebook provides an interactive interface to:
1. **Select a variable** to load using a text input box
2. **Choose experiment and run** from dropdowns
3. **Select parameters** dynamically from sweep.yaml and analysis_config.yaml
4. **Load and display datacube data** for verification

## Setup

Import required libraries and set up the project path.

In [None]:
import sys
from pathlib import Path
import numpy as np
import yaml
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

project_root = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

from src.core.constants import DIRS
from src.workflows.analysis_pipeline import load_all_var_files, get_analytical_solution

PENCIL_CODE_PYTHON_PATH = project_root.parent / "pencil-code" / "python"
if str(PENCIL_CODE_PYTHON_PATH) not in sys.path:
    sys.path.insert(0, str(PENCIL_CODE_PYTHON_PATH))

print("✓ Setup complete")

## Cell 1: Select Variable, Experiment and Run

In [None]:
def get_available_experiments():
    config_dir = DIRS.config
    experiments = []
    for exp_dir in config_dir.iterdir():
        if exp_dir.is_dir() and not exp_dir.name.startswith('.'):
            plan_file = exp_dir / DIRS.plan_subdir / "sweep.yaml"
            if plan_file.exists():
                experiments.append(exp_dir.name)
    return sorted(experiments)

def get_runs_for_experiment(experiment_name):
    manifest_file = DIRS.runs / experiment_name / "run_manifest.txt"
    if not manifest_file.exists():
        return []
    with open(manifest_file, 'r') as f:
        runs = [line.strip() for line in f if line.strip()]
    return runs

def organize_runs_by_branch(runs, sweep_config):
    branches = sweep_config.get('branches', [])
    branch_names = [b['name'] for b in branches] if branches else ['default']
    runs_per_branch = {branch: [] for branch in branch_names}
    runs_per_branch['all'] = runs
    for run in runs:
        for branch_name in branch_names:
            if branch_name in run:
                runs_per_branch[branch_name].append(run)
                break
    return runs_per_branch

experiments = get_available_experiments()

var_text = widgets.Text(
    value='rho',
    placeholder='Enter variable name (e.g., rho, ux, pp, ee)',
    description='Variable:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

experiment_dropdown = widgets.Dropdown(
    options=experiments,
    description='Experiment:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

branch_dropdown = widgets.Dropdown(
    options=['all'],
    description='Branch:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

run_dropdown = widgets.Dropdown(
    options=[],
    description='Run:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='800px')
)

def update_runs(change):
    experiment = change['new']
    plan_file = DIRS.config / experiment / DIRS.plan_subdir / "sweep.yaml"
    with open(plan_file, 'r') as f:
        sweep_config = yaml.safe_load(f)
    runs = get_runs_for_experiment(experiment)
    runs_per_branch = organize_runs_by_branch(runs, sweep_config)
    branch_options = list(runs_per_branch.keys())
    branch_dropdown.options = branch_options
    branch_dropdown.value = 'all' if 'all' in branch_options else branch_options[0]
    experiment_dropdown._runs_per_branch = runs_per_branch

def update_run_list(change):
    branch = change['new']
    runs_per_branch = getattr(experiment_dropdown, '_runs_per_branch', {})
    if branch in runs_per_branch:
        run_dropdown.options = runs_per_branch[branch]
        if runs_per_branch[branch]:
            run_dropdown.value = runs_per_branch[branch][0]

experiment_dropdown.observe(update_runs, names='value')
branch_dropdown.observe(update_run_list, names='value')

if experiments:
    update_runs({'new': experiments[0]})

display(widgets.VBox([
    widgets.HTML("<h3>Step 1: Select Variable and Experiment</h3>"),
    var_text,
    experiment_dropdown,
    branch_dropdown,
    run_dropdown
]))

print(f"Found {len(experiments)} experiments")

## Cell 2: Select Parameters from Configuration Files

In [None]:
def load_sweep_parameters(experiment_name):
    plan_file = DIRS.config / experiment_name / DIRS.plan_subdir / "sweep.yaml"
    with open(plan_file, 'r') as f:
        sweep_config = yaml.safe_load(f)
    param_sweeps = sweep_config.get('parameter_sweeps', [])
    parameters = {}
    for sweep in param_sweeps:
        if sweep.get('type') == 'linked':
            variables = sweep.get('variables', [])
            values = sweep.get('values', [])
            for var in variables:
                parameters[var] = {'type': 'linked', 'values': values}
        else:
            variable = sweep.get('variable', '')
            values = sweep.get('values', [])
            parameters[variable] = {'type': 'independent', 'values': values}
    return parameters, sweep_config

def load_analysis_config(experiment_name):
    config_file = DIRS.config / experiment_name / "analysis_config.yaml"
    if not config_file.exists():
        return {}
    with open(config_file, 'r') as f:
        return yaml.safe_load(f)

param_output = widgets.Output()
sweep_param_widgets = {}
analysis_param_widgets = {}

def create_parameter_widgets():
    with param_output:
        clear_output()
        experiment = experiment_dropdown.value
        if not experiment:
            print("Please select an experiment first")
            return
        
        sweep_params, sweep_config = load_sweep_parameters(experiment)
        analysis_config = load_analysis_config(experiment)
        
        print(f"Configuration for: {experiment}\n" + "="*80)
        print("\nSWEEP PARAMETERS (from sweep.yaml):")
        
        if sweep_params:
            for param_name, param_info in sweep_params.items():
                print(f"  • {param_name}: {param_info['values']}")
                widget = widgets.Dropdown(
                    options=[str(v) for v in param_info['values']],
                    description=f'{param_name}:',
                    style={'description_width': 'initial'},
                    layout=widgets.Layout(width='400px')
                )
                sweep_param_widgets[param_name] = widget
        
        print("\nANALYSIS CONFIGURATION (from analysis_config.yaml):")
        variables = analysis_config.get('variables', {})
        if variables:
            var_options = list(variables.keys())
            print("  Variables:")
            for var_name in var_options:
                print(f"    • {var_name}")
            var_widget = widgets.SelectMultiple(
                options=var_options,
                value=[var_options[0]] if var_options else [],
                description='Variables:',
                style={'description_width': 'initial'},
                layout=widgets.Layout(width='400px', height='100px')
            )
            analysis_param_widgets['variables'] = var_widget
        
        error_analysis = analysis_config.get('error_analysis', {})
        if error_analysis:
            metrics = error_analysis.get('metrics', [])
            print(f"  Metrics: {metrics}")
            if metrics:
                metric_widget = widgets.Dropdown(
                    options=metrics,
                    description='Metric:',
                    style={'description_width': 'initial'},
                    layout=widgets.Layout(width='300px')
                )
                analysis_param_widgets['metric'] = metric_widget
        
        print("\n" + "="*80 + "\n")
        if sweep_param_widgets:
            print("Sweep Parameters:")
            for widget in sweep_param_widgets.values():
                display(widget)
        if analysis_param_widgets:
            print("\nAnalysis Parameters:")
            for widget in analysis_param_widgets.values():
                display(widget)

load_config_button = widgets.Button(
    description='Load Configuration',
    button_style='info',
    icon='refresh'
)
load_config_button.on_click(lambda b: create_parameter_widgets())

display(widgets.VBox([
    widgets.HTML("<h3>Step 2: Load and Select Parameters</h3>"),
    load_config_button,
    param_output
]))

## Cell 3: Load and Display Datacube

In [None]:
data_output = widgets.Output()
loaded_data = {'sim_data': None, 'analytical_data': None, 'experiment': None, 'run': None, 'variable': None}

def load_and_display_data():
    with data_output:
        clear_output(wait=True)
        experiment = experiment_dropdown.value
        run_name = run_dropdown.value
        variable = var_text.value
        
        if not experiment or not run_name or not variable:
            print("⚠️ Please select experiment, run, and variable")
            return
        
        print(f"{'='*80}\nLOADING DATA\n{'='*80}")
        print(f"Experiment: {experiment}\nRun: {run_name}\nVariable: {variable}\n")
        
        plan_file = DIRS.config / experiment / DIRS.plan_subdir / "sweep.yaml"
        with open(plan_file, 'r') as f:
            plan = yaml.safe_load(f)
        
        hpc_run_base_dir = Path(plan['hpc']['run_base_dir'])
        run_path = hpc_run_base_dir / run_name
        
        print(f"Loading from: {run_path}\n")
        all_sim_data = load_all_var_files(run_path)
        
        if not all_sim_data:
            print("❌ Failed to load VAR files")
            return
        
        print(f"✓ Loaded {len(all_sim_data)} VAR files")
        
        all_analytical_data = []
        for sim_data in all_sim_data:
            analytical_data = get_analytical_solution(sim_data['params'], sim_data['x'], sim_data['t'])
            if analytical_data:
                all_analytical_data.append(analytical_data)
        
        print(f"✓ Generated {len(all_analytical_data)} analytical solutions\n")
        
        loaded_data['sim_data'] = all_sim_data
        loaded_data['analytical_data'] = all_analytical_data
        loaded_data['experiment'] = experiment
        loaded_data['run'] = run_name
        loaded_data['variable'] = variable
        
        print(f"{'='*80}\nDATA VERIFICATION\n{'='*80}\n")
        
        if variable not in all_sim_data[0]:
            print(f"⚠️ Variable '{variable}' not found")
            print(f"Available: {list(all_sim_data[0].keys())}")
            return
        
        sim_first = all_sim_data[0][variable]
        anal_first = all_analytical_data[0][variable]
        sim_last = all_sim_data[-1][variable]
        anal_last = all_analytical_data[-1][variable]
        
        print(f"Variable: {variable}")
        print(f"\nFirst timestep (t={all_sim_data[0]['t']:.6e}):")
        print(f"  Sim: min={np.min(sim_first):.6e}, max={np.max(sim_first):.6e}, mean={np.mean(sim_first):.6e}")
        print(f"  Ana: min={np.min(anal_first):.6e}, max={np.max(anal_first):.6e}, mean={np.mean(anal_first):.6e}")
        
        print(f"\nLast timestep (t={all_sim_data[-1]['t']:.6e}):")
        print(f"  Sim: min={np.min(sim_last):.6e}, max={np.max(sim_last):.6e}, mean={np.mean(sim_last):.6e}")
        print(f"  Ana: min={np.min(anal_last):.6e}, max={np.max(anal_last):.6e}, mean={np.mean(anal_last):.6e}")
        
        error_first = np.abs(sim_first - anal_first)
        error_last = np.abs(sim_last - anal_last)
        print(f"\nAbsolute Error:")
        print(f"  First: mean={np.mean(error_first):.6e}, max={np.max(error_first):.6e}")
        print(f"  Last: mean={np.mean(error_last):.6e}, max={np.max(error_last):.6e}")
        
        x_coords = all_sim_data[0]['x']
        timesteps = [s['t'] for s in all_sim_data]
        print(f"\nSpatial Grid: {len(x_coords)} points, dx={np.mean(np.diff(x_coords)):.6e}")
        print(f"Time: {len(timesteps)} snapshots, range=[{timesteps[0]:.6e}, {timesteps[-1]:.6e}]")
        
        print(f"\n{'='*80}\n✅ DATA LOADED SUCCESSFULLY\n{'='*80}\n")
        
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.plot(x_coords, sim_first, 'b-', label='Simulation', linewidth=2)
        ax1.plot(x_coords, anal_first, 'r--', label='Analytical', linewidth=2)
        ax1.set_xlabel('Position (x)')
        ax1.set_ylabel(variable)
        ax1.set_title(f'First Timestep (t={all_sim_data[0]["t"]:.6e})')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        ax2.plot(x_coords, sim_last, 'b-', label='Simulation', linewidth=2)
        ax2.plot(x_coords, anal_last, 'r--', label='Analytical', linewidth=2)
        ax2.set_xlabel('Position (x)')
        ax2.set_ylabel(variable)
        ax2.set_title(f'Last Timestep (t={all_sim_data[-1]["t"]:.6e})')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

load_data_button = widgets.Button(
    description='Load Data',
    button_style='success',
    icon='download'
)
load_data_button.on_click(lambda b: load_and_display_data())

display(widgets.VBox([
    widgets.HTML("<h3>Step 3: Load and Verify Data</h3>"),
    load_data_button,
    data_output
]))

## Cell 4: Access Loaded Data

After loading, you can access the data in the `loaded_data` dictionary:

In [None]:
# Example: Access the loaded data
if loaded_data['sim_data']:
    print(f"Experiment: {loaded_data['experiment']}")
    print(f"Run: {loaded_data['run']}")
    print(f"Variable: {loaded_data['variable']}")
    print(f"Number of timesteps: {len(loaded_data['sim_data'])}")
    print(f"\nYou can now use loaded_data['sim_data'] and loaded_data['analytical_data'] for further analysis")
else:
    print("No data loaded yet. Please load data using the button above.")