**DATA PROCESSING**: Only the first 30 frames

**prepare data all frames**: it will generate the matrix X and Y where X are the inputs: $\mathbb{R}^{5 \times N_c}$ and Y are $\xi(t,x,y)$: outputs $\mathbb{R}^{250 \times 250 \times 30 \times N_c}$: such that $N_c$: is the number of combinations available

In [1]:
import numpy as np
import os
import json
from tqdm import tqdm

# === Configuration ===
DATASET_DIR = r"C:\Users\Ali\Desktop\798 Project\dataset500"
SAVE_DIR = DATASET_DIR

input_keys = ["dT0", "c", "N", "theta_deg", "seed_radius"]

# === Load metadata ===
metadata_path = os.path.join(DATASET_DIR, "metadata.json")
with open(metadata_path, "r") as f:
    metadata = json.load(f)

X_list = []
Y_list = []

print("📚 Preparing full evolution data...")

for entry in tqdm(metadata):
    # 1. Build input vector
    x_vec = [entry[k] for k in input_keys]
    X_list.append(x_vec)

    # 2. Load full ξ series (all frames)
    xi_path = os.path.join(DATASET_DIR, entry["filename_xi"])
    xi_series = np.load(xi_path)[:30]  # Take only first 30 frames
    Y_list.append(xi_series)

# === Stack into arrays ===
X = np.array(X_list)  # shape (253, 5)
Y = np.stack(Y_list)  # shape (253, 41, 250, 250)

print(f"✅ X shape: {X.shape}")
print(f"✅ Y shape: {Y.shape}")

# === Save ===
np.save(os.path.join(SAVE_DIR, "X_full.npy"), X)
np.save(os.path.join(SAVE_DIR, "Y_full.npy"), Y)

print(f"✅ Full evolution data saved to {SAVE_DIR}")


ModuleNotFoundError: No module named 'tqdm'

**DATA SPLITTING**

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# === Load full dataset ===
X = np.load(r"C:\Users\Ali\Desktop\798 Project\dataset500\Full Data Evolution 30\X_full.npy")
Y = np.load(r"C:\Users\Ali\Desktop\798 Project\dataset500\Full Data Evolution 30\Y_full.npy")

# === First split: Train+Val vs Test (85% / 15%) ===
X_trainval, X_test, Y_trainval, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

# === Second split: Train vs Val (82% / 18% inside trainval) ===
# So overall it's 70/15/15
X_train, X_val, Y_train, Y_val = train_test_split(X_trainval, Y_trainval, test_size=0.1765, random_state=42)

# === Save splits ===
np.save(r"C:\Users\Ali\Desktop\798 Project\X_train.npy", X_train)
np.save(r"C:\Users\Ali\Desktop\798 Project\Y_train.npy", Y_train)

np.save(r"C:\Users\Ali\Desktop\798 Project\X_val.npy", X_val)
np.save(r"C:\Users\Ali\Desktop\798 Project\Y_val.npy", Y_val)

np.save(r"C:\Users\Ali\Desktop\798 Project\X_test.npy", X_test)
np.save(r"C:\Users\Ali\Desktop\798 Project\Y_test.npy", Y_test)

print(f"✅ Done! Split sizes:")
print(f"Training set: {X_train.shape[0]}")
print(f"Validation set: {X_val.shape[0]}")
print(f"Test set: {X_test.shape[0]}")


**DATA SET LOADER:** loads data for training! next file is CNN Training file. 

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

# === Configuration ===
DATASET_DIR = rDATASET_DIR = r"C:\Users\Ali\Desktop\798 Project\Splitting 30 Frames"
 # Adjust if needed

class DendriteDataset(Dataset):
    def __init__(self, split="train"):
        assert split in ["train", "val", "test"], "Split must be 'train', 'val', or 'test'."

        self.X = np.load(os.path.join(DATASET_DIR, f"X_{split}.npy"))
        self.Y = np.load(os.path.join(DATASET_DIR, f"Y_{split}.npy"))

        self.X = torch.tensor(self.X, dtype=torch.float32)
        self.Y = torch.tensor(self.Y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = self.X[idx]           # Shape: (5,)
        y = self.Y[idx]           # Shape: (41, 250, 250)
        return x, y

# === Usage example ===
if __name__ == "__main__":
    # Create datasets
    train_dataset = DendriteDataset(split="train")
    val_dataset = DendriteDataset(split="val")
    test_dataset = DendriteDataset(split="test")

    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

    # Quick check
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        print(f"Input batch shape: {inputs.shape}")   # Should be (batch_size, 5)
        print(f"Target batch shape: {targets.shape}") # Should be (batch_size, 41, 250, 250)
        break
