# Create Dataset Notebook
This notebook generates new experiment runs and datasets for normal mixture experiments. Every execution creates a new run and new data.

In [1]:
import json
import os
from datetime import datetime

import numpy as np
from scipy_extensions import normal_mixture

# Define DGP and generate new experiment run
means = [0, 1.5]
stds = [1, 1]
weights = [0.25, 0.75]
sample_sizes = [50, 200, 5000]
random_seed = 2026

base_dir = "./data/"
run_id = datetime.now().strftime("run_%Y%m%d_%H%M%S")
run_path = os.path.join(base_dir, run_id)
os.makedirs(run_path, exist_ok=True)

# Save DGP
DGP = {
    "means": means,
    "stds": stds,
    "weights": weights,
    "sample_sizes": sample_sizes,
    "random_seed": random_seed,
}
dgp_path = os.path.join(run_path, "dgp.json")
with open(dgp_path, "w") as f:
    json.dump(DGP, f, indent=2)
print(f"Created and saved DGP to {dgp_path}")

# Generate and save data
rng = np.random.default_rng(random_seed)

for n in sample_sizes:
    csv_path = os.path.join(run_path, f"mixture_samples_n{n}.csv")
    npy_path = os.path.join(run_path, f"mixture_samples_n{n}.npy")
    samples, components = normal_mixture.sample(
        n, weights, means, stds, random_state=rng
    )
    np.save(npy_path, samples)
    np.savetxt(csv_path, samples, delimiter=",")
    print(f"Saved data for n={n} to {csv_path} and {npy_path}")

ModuleNotFoundError: No module named 'scipy_extensions'