In [None]:
import os
import numpy as np

# ── 1. small_batches フォルダから全バッチをロード ──
batch_dir = "./small_batches"
wave_files = sorted([os.path.join(batch_dir, f) for f in os.listdir(batch_dir) if f.startswith("waves")])
vel_files  = sorted([os.path.join(batch_dir, f) for f in os.listdir(batch_dir) if f.startswith("vels")])

# 全バッチを連結して大きな配列に
all_waves = np.concatenate([np.load(p) for p in wave_files], axis=0)  # (10000,5,1000,70)
all_vels  = np.concatenate([np.load(p) for p in vel_files],  axis=0)  # (10000,1,70,70)

assert all_waves.shape[0] == all_vels.shape[0]
N = all_waves.shape[0]

# ── 2. train/val インデックスをランダムに分割 ──
from sklearn.model_selection import train_test_split

# 例えば 80% を train、20% を val
idx = np.arange(N)
train_idx, val_idx = train_test_split(idx, test_size=0.2, random_state=42, shuffle=True)

# ── 3. 分割後のデータを各配列に取り出して保存 ──
os.makedirs("./dataset", exist_ok=True)

# train
np.save("./dataset/train_waves.npy", all_waves[train_idx])
np.save("./dataset/train_vels.npy",  all_vels[train_idx])
print("Train set:", all_waves[train_idx].shape, all_vels[train_idx].shape)

# val
np.save("./dataset/val_waves.npy", all_waves[val_idx])
np.save("./dataset/val_vels.npy",  all_vels[val_idx])
print("Val set:  ", all_waves[val_idx].shape,  all_vels[val_idx].shape)
