This notebook prepares the data for the subsequent notebook `10-Step-Analyze.ipynb`, which generates figures illustrating the multi-step prediction accuracy of other model families, as described in Supplementary Material Section 5.

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
num_devices = torch.cuda.device_count()
print("Number of visible GPUs:", num_devices)

for i in range(num_devices):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

current_device = torch.cuda.current_device()
print("Current device index:", current_device)
print("Current device name:", torch.cuda.get_device_name(current_device))

import random
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

from tqdm import tqdm
from data_processing import (
    SimpleSerializerSettings, scale_2d_array, unscale_2d_array,
    serialize_2d_integers, deserialize_2d_integers
)

from allen_cahn_equation import (
    compute_exact_solution_random_ic_vary_Nx,
    finite_difference_multi_predictions,
    visualize_spline_ic,
    plot_both_grids
)

from smollm_utils import load_model_and_tokenizer, llm_multi_predictions
#from phi_utils import load_model_and_tokenizer, generate_text_multiple
#from llama_utils import load_model_and_tokenizer, generate_text_multiple

MODEL_NAME = "HuggingFaceTB/SmolLM3-3B-Base"
# MODEL_NAME = "microsoft/phi-4"
# MODEL_NAME = "meta-llama/Llama-3.2-3B"

# Set random seeds for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

In [None]:
model, tokenizer = load_model_and_tokenizer(MODEL_NAME)

In [None]:
# Example: Demonstrating the process of generating and visualizing a random initial condition
L = 2
Nx = 14
init_cond_random = np.random.uniform(-0.5, 0.5, size=Nx)
fig, cs = visualize_spline_ic(L, Nx, init_cond_random)
plt.tight_layout()
plt.show()

# Example: Demonstrating how to resample spatial points from an underlying random initial condition
Nx_original = Nx
Nx_new = 14
fig, cs, init_cond_random_new = plot_both_grids(L, Nx_original, Nx_new, init_cond_random)
plt.tight_layout()
plt.show()

In [None]:
# Define parameters for the Allen-Cahn equation
L = 2       # Length of the spatial domain
k = 0.001   # Thermal diffusivity
T = 0.5     # Total simulation time
Nt = 25     # Number of time steps
dt = T/Nt
Nx = 14     # Number of spatial steps (excluding boundary points)
dx = L/(Nx+1)
settings = SimpleSerializerSettings(space_sep=",", time_sep=";")
input_time_steps = 16
number_of_future_predictions = 10
n_ics = 20 
n_runs_per_ic = 20 

# Generate all random initial conditions and spline objects
stored_initial_conditions = []
stored_spline_objects = []
for ic_seed in range(n_ics):
    random.seed(ic_seed)
    np.random.seed(ic_seed)
    torch.manual_seed(ic_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(ic_seed)
    init_cond_random = np.random.uniform(-0.5, 0.5, size=Nx)
    stored_initial_conditions.append(init_cond_random.copy())
    fig, cs = visualize_spline_ic(L, Nx, init_cond_random)
    plt.close(fig)
    stored_spline_objects.append(cs)

stored_initial_conditions_array = np.array(stored_initial_conditions)
all_llm_max_diffs = []
all_llm_rmses = []
all_fd_results = []

for ic_seed in tqdm(range(n_ics)):
    # Use the stored initial condition and spline
    init_cond_random = stored_initial_conditions[ic_seed]
    cs = stored_spline_objects[ic_seed]
    # Compute exact solution for this initial condition
    u_exact = compute_exact_solution_random_ic_vary_Nx(L, k, T, Nx, Nt, spline_obj=cs)
    u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
    u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
    # Run LLM prediction once for this initial condition
    llm_max_diffs, llm_rmses, _, _, _  = llm_multi_predictions(
        full_serialized_data=u_exact_serialized,
        input_time_steps=input_time_steps,
        number_of_future_predictions=number_of_future_predictions,
        model=model,
        tokenizer=tokenizer,
        Nx=Nx,
        settings=settings,
        vmin=vmin_exact,
        vmax=vmax_exact,
        n_seeds=n_runs_per_ic
    )

    # Run finite difference predictions
    fd_results = finite_difference_multi_predictions(
        full_serialized_data=u_exact_serialized,
        input_time_steps=input_time_steps,
        number_of_future_predictions=number_of_future_predictions,
        settings=settings,
        vmin=vmin_exact,
        vmax=vmax_exact,
        L=L,
        k=k,
        Nt=Nt,
        Nx=Nx,
        T=T
    )
    all_llm_max_diffs.append(llm_max_diffs)
    all_llm_rmses.append(llm_rmses)
    all_fd_results.append(fd_results)
ftcs_max_diffs = [res['ftcs']['max_diff'] for res in all_fd_results]
ftcs_rmses = [res['ftcs']['rmse'] for res in all_fd_results]
imex_max_diffs = [res['imex']['max_diff'] for res in all_fd_results]
imex_rmses = [res['imex']['rmse'] for res in all_fd_results]
avg_ftcs_max_diff = np.mean(ftcs_max_diffs, axis=0)
avg_ftcs_rmse = np.mean(ftcs_rmses, axis=0)
avg_imex_max_diff = np.mean(imex_max_diffs, axis=0)
avg_imex_rmse = np.mean(imex_rmses, axis=0)
averaged_fd_results = {
    'ftcs': {
        'max_diff': avg_ftcs_max_diff.tolist(),
        'rmse': avg_ftcs_rmse.tolist()
    },
    'imex': {
        'max_diff': avg_imex_max_diff.tolist(),
        'rmse': avg_imex_rmse.tolist()
    }
}

# Compute quant floor
all_baseline_max_errors_per_step = []
all_baseline_rmse_errors_per_step = []
for ic_seed in range(n_ics):
    # Use the stored initial condition and spline
    init_cond_random = stored_initial_conditions[ic_seed]
    cs = stored_spline_objects[ic_seed]
    # Compute exact solution for this initial condition
    u_exact = compute_exact_solution_random_ic_vary_Nx(L, k, T, Nx, Nt, spline_obj=cs)
    # Quantization pipeline
    u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
    u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
    u_exact_parsed = deserialize_2d_integers(u_exact_serialized, settings)
    u_exact_unscaled = unscale_2d_array(u_exact_parsed, vmin_exact, vmax_exact)
    seed_max_errors_per_step = []
    seed_rmse_errors_per_step = []
    for t in range(u_exact.shape[0]):
            max_err_t  = np.max(np.abs(u_exact[t] - u_exact_unscaled[t]))
            rmse_err_t = np.sqrt(np.mean((u_exact[t] - u_exact_unscaled[t])**2))
            seed_max_errors_per_step.append(max_err_t)
            seed_rmse_errors_per_step.append(rmse_err_t)
    all_baseline_max_errors_per_step.append(seed_max_errors_per_step)
    all_baseline_rmse_errors_per_step.append(seed_rmse_errors_per_step)
all_baseline_max_errors_per_step  = np.array(all_baseline_max_errors_per_step)
all_baseline_rmse_errors_per_step = np.array(all_baseline_rmse_errors_per_step)

# Compute averages for per-step quantization errors
avg_baseline_max_errors_per_step = np.mean(all_baseline_max_errors_per_step, axis=0)
avg_baseline_rmse_errors_per_step = np.mean(all_baseline_rmse_errors_per_step, axis=0)
avg_baseline_max_errors_prediction = avg_baseline_max_errors_per_step[input_time_steps:]
avg_baseline_rmse_errors_prediction = avg_baseline_rmse_errors_per_step[input_time_steps:]
avg_llm_max_diffs = np.mean(all_llm_max_diffs, axis=0)
avg_llm_rmses = np.mean(all_llm_rmses, axis=0)
std_llm_max_diffs = np.std(all_llm_max_diffs, axis=0, ddof=1)
std_llm_rmses = np.std(all_llm_rmses, axis=0, ddof=1)

def log_ci(mean, std, n, tcrit):
    """
    95% CI for log10 axis
    mean : arithmetic mean of the n samples
    std : sample std of the n samples
    n : number of samples
    tcrit: two-sided t critical value
    """
    se = std / np.sqrt(n)  # SE in linear space
    se_log = se / (mean * np.log(10))  # delta-method SE in log space
    mean_log = np.log10(mean)
    delta_log = tcrit * se_log
    return 10**(mean_log - delta_log), 10**(mean_log + delta_log)

# Calculate confidence intervals
t_critical = stats.t.ppf(0.975, n_ics - 1)  # 95% CI
ci_lower_max_diffs_3BHF = []
ci_upper_max_diffs_3BHF = []
ci_lower_rmses_3BHF = []
ci_upper_rmses_3BHF = []

for mean, std in zip(avg_llm_max_diffs, std_llm_max_diffs):
    lower, upper = log_ci(mean, std, n_ics, t_critical)
    ci_lower_max_diffs_3BHF.append(lower)
    ci_upper_max_diffs_3BHF.append(upper)

for mean, std in zip(avg_llm_rmses, std_llm_rmses):
    lower, upper = log_ci(mean, std, n_ics, t_critical)
    ci_lower_rmses_3BHF.append(lower)
    ci_upper_rmses_3BHF.append(upper)

# Save all results
np.savez_compressed(
    "3BHF_10_step.npz",
    # Averaged LLM metrics
    llm_max_diffs_3BHF=avg_llm_max_diffs,
    llm_rmses_3BHF=avg_llm_rmses,
    std_max_diffs_3BHF=std_llm_max_diffs,
    std_rmses_3BHF=std_llm_rmses,
    # LLM confidence intervals
    ci_lower_max_diffs_3BHF=ci_lower_max_diffs_3BHF,
    ci_upper_max_diffs_3BHF=ci_upper_max_diffs_3BHF,
    ci_lower_rmses_3BHF=ci_lower_rmses_3BHF,
    ci_upper_rmses_3BHF=ci_upper_rmses_3BHF,
    # Averaged finite difference metrics
    fd_results=averaged_fd_results,
    # Raw results for all initial conditions
    all_llm_max_diffs=all_llm_max_diffs,
    all_llm_rmses=all_llm_rmses,
    all_fd_results=all_fd_results,
    # Quant Floor
    avg_baseline_max_errors_prediction=avg_baseline_max_errors_prediction,
    avg_baseline_rmse_errors_prediction=avg_baseline_rmse_errors_prediction,
    stored_initial_conditions=stored_initial_conditions_array,
)