Cell 1 : builds a metadata.json file by scanning the dataset folder for simulation outputs (xi_series_*.npy) corresponding to runs 0–300 and recording their parameters and shapes. It ensures that each entry is traceable and consistent with the simulation's input-output structure.

Cell 2: loads the metadata, filters simulations with valid output shapes, and constructs input-output pairs where the input is the simulation parameters and the output is the final dendritic morphology. It splits the data into training and testing sets and saves them as .npy files for use in surrogate model training

In [None]:
# Cell 1 

import os
import numpy as np
import json
import itertools

# Dataset folder
dataset_dir = r"C:\Users\Georges\.vscode\798 final project\dataset"

# Make sure the dataset folder exists
os.makedirs(dataset_dir, exist_ok=True)

# Parameter grid (same order)
param_grid = {
    "dT0": [-0.2, -0.4, -0.6, -0.8],
    "c": [0.005, 0.02, 0.05],
    "N": [4, 6, 8],
    "theta_deg": [0, 15, 30, 45],
    "seed_radius": [0.08, 0.1, 0.15]
}

# Generate all parameter combinations
param_combinations = list(itertools.product(*param_grid.values()))

# Prepare metadata list
meta_log = []

for run_id in range(0, 301):  # Only from 0 to 300 inclusive
    dT0, c, N, theta_deg, seed_radius = param_combinations[run_id]
    
    xi_filename = f"xi_series_{run_id:03d}.npy"
    dT_filename = f"dT_series_{run_id:03d}.npy"
    
    xi_path = os.path.join(dataset_dir, xi_filename)
    
    if os.path.exists(xi_path):
        xi_series = np.load(xi_path, mmap_mode='r')
        xi_shape = list(xi_series.shape)

        meta_entry = {
            "run_id": f"{run_id:03d}",
            "filename_xi": xi_filename,
            "filename_dT": dT_filename,
            "dT0": dT0,
            "c": c,
            "N": N,
            "theta_deg": theta_deg,
            "seed_radius": seed_radius,
            "xi_shape": xi_shape,
            "dT_shape": xi_shape
        }
        meta_log.append(meta_entry)
    else:
        print(f"⚠ Warning: {xi_filename} missing!")

# Save metadata
metadata_path = os.path.join(dataset_dir, "metadata.json")
with open(metadata_path, "w") as f:
    json.dump(meta_log, f, indent=2)

print(f"✅ New metadata.json created with {len(meta_log)} entries from runs 0 to 300.")


✅ New metadata.json created with 253 entries from runs 0 to 300.


In [None]:
# Cell 2 
import numpy as np
import os
import json
from sklearn.model_selection import train_test_split

# Dataset folder
dataset_dir = r"C:\Users\Georges\.vscode\798 final project\dataset"
train_fraction = 0.8

# Load metadata
metadata_path = os.path.join(dataset_dir, "metadata.json")
with open(metadata_path, "r") as f:
    metadata = json.load(f)

# Prepare input and output lists
inputs = []
outputs = []

for entry in metadata:
    xi_filename = entry["filename_xi"]
    
    # Load xi series
    xi_series = np.load(os.path.join(dataset_dir, xi_filename))
    
    # Take final frame only
    xi_final = xi_series[-1]  # (ny, nx)

    if xi_final.shape != (250, 250):
        print(f"⚠ Skipping {xi_filename} because shape is {xi_final.shape}")
        continue  # skip this sample

    # Build input vector from metadata
    param_vector = [
        entry["dT0"],
        entry["c"],
        entry["N"],
        entry["theta_deg"],
        entry["seed_radius"]
    ]

    inputs.append(param_vector)
    outputs.append(xi_final)

# Convert to arrays
X = np.array(inputs)  # (n_samples, 5)
Y = np.stack(outputs, axis=0)  # (n_samples, 250, 250)

# Train/test split
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, train_size=train_fraction, random_state=42
)

print(f"✅ Train/test split done:")
print(f"  - X_train: {X_train.shape}, Y_train: {Y_train.shape}")
print(f"  - X_test: {X_test.shape}, Y_test: {Y_test.shape}")

# Save results
np.save(os.path.join(dataset_dir, "X_train.npy"), X_train)
np.save(os.path.join(dataset_dir, "Y_train.npy"), Y_train)
np.save(os.path.join(dataset_dir, "X_test.npy"), X_test)
np.save(os.path.join(dataset_dir, "Y_test.npy"), Y_test)

print(f"✅ Train/test .npy files saved successfully.")


⚠ Skipping xi_series_000.npy because shape is (100, 100)
⚠ Skipping xi_series_001.npy because shape is (100, 100)
✅ Train/test split done:
  - X_train: (200, 5), Y_train: (200, 250, 250)
  - X_test: (51, 5), Y_test: (51, 250, 250)
✅ Train/test .npy files saved successfully.
