This notebook prepares the data for the subsequent notebook `10-Step-Analyze.ipynb`, which generates figures illustrating the bias analysis of the multi-step prediction rollout, as described in Supplementary Material Section 9.

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
num_devices = torch.cuda.device_count()
print("Number of visible GPUs:", num_devices)

for i in range(num_devices):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

current_device = torch.cuda.current_device()
print("Current device index:", current_device)
print("Current device name:", torch.cuda.get_device_name(current_device))

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt

from data_processing import (
    SimpleSerializerSettings, scale_2d_array, unscale_2d_array,
    serialize_2d_integers, deserialize_2d_integers
)

from allen_cahn_equation import (
    compute_exact_solution_random_ic_vary_Nx,
    finite_difference_multi_predictions,
    visualize_spline_ic,
    plot_both_grids
)

from llama_utils import load_model_and_tokenizer, llm_multi_predictions

MODEL_NAME = "meta-llama/Llama-3.1-8B"
# MODEL_NAME = "meta-llama/Llama-3.2-3B"
# MODEL_NAME = "meta-llama/Llama-3.2-1B"

# Set random seeds for reproducibility
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

In [None]:
model, tokenizer = load_model_and_tokenizer(MODEL_NAME)

In [None]:
# Example: Demonstrating the process of generating and visualizing a random initial condition
L = 2
Nx = 14
init_cond_random = np.random.uniform(-0.5, 0.5, size=Nx)
fig, cs = visualize_spline_ic(L, Nx, init_cond_random)
plt.tight_layout()
plt.show()

# Example: Demonstrating how to resample spatial points from an underlying random initial condition
Nx_original = Nx
Nx_new = 14
fig, cs, init_cond_random_new = plot_both_grids(L, Nx_original, Nx_new, init_cond_random)
plt.tight_layout()
plt.show()

In [None]:
# Define parameters for the Allen-Cahn equation
L = 2       # Length of the spatial domain
k = 0.001   # Thermal diffusivity
T = 0.5     # Total simulation time
Nt = 25     # Number of time steps
dt = T/Nt
Nx = 14     # Number of spatial steps (excluding boundary points)
dx = L/(Nx+1)
settings = SimpleSerializerSettings(space_sep=",", time_sep=";")
input_time_steps = 16
number_of_future_predictions = 10
n_seeds = 20
u_exact = compute_exact_solution_random_ic_vary_Nx(L, k, T, Nx, Nt, spline_obj=cs)
# Serialize the entire exact solution for all time steps
u_exact_scaled, vmin_exact, vmax_exact = scale_2d_array(u_exact)
u_exact_serialized = serialize_2d_integers(u_exact_scaled, settings)
# Reconstruct the scaled data from the text
u_exact_parsed = deserialize_2d_integers(u_exact_serialized, settings)
u_exact_unscaled = unscale_2d_array(u_exact_parsed, vmin_exact, vmax_exact)


In [None]:
llm_max_diffs, llm_rmses, llm_predictions, std_max_diffs, std_rmses = llm_multi_predictions(
    full_serialized_data=u_exact_serialized,
    input_time_steps=input_time_steps,
    number_of_future_predictions=number_of_future_predictions,
    model=model,
    tokenizer=tokenizer,
    Nx=Nx,
    settings=settings,
    vmin=vmin_exact,
    vmax=vmax_exact,
    n_seeds=n_seeds
)

fd_results = finite_difference_multi_predictions(
    full_serialized_data=u_exact_serialized,
    input_time_steps=input_time_steps,
    number_of_future_predictions=number_of_future_predictions,
    settings=settings,
    vmin=vmin_exact,
    vmax=vmax_exact,
    L=L,
    k=k,
    Nt=Nt,
    Nx=Nx,
    T=T
)

np.savez_compressed(
    "8B_10_step.npz",
    # LLM metrics
    llm_max_diffs_8B = llm_max_diffs,
    llm_rmses_8B  = llm_rmses,
    llm_predictions_8B  = llm_predictions,
    std_max_diffs_8B = std_max_diffs,
    std_rmses_8B = std_rmses,
    # Finite difference metrics
    fd_results = fd_results,
)