In [1]:
import numpy as np
import os 
import matplotlib.pyplot as plt

In [2]:
data_dir = "./waveform-inversion/train_samples" 

In [3]:
for root, dirs, files in os.walk(data_dir):
    # ルートディレクトリ名をインデント付きで表示
    depth = root.replace(data_dir, "").count(os.sep)
    indent = "  " * depth
    print(f"{indent}{os.path.basename(root) or data_dir}:")
    # すべての .npy ファイルを表示
    for f in files:
        if f.endswith(".npy"):
            print(f"{indent}  └ {f}")

train_samples:
  CurveFault_A:
    └ seis2_1_0.npy
    └ seis4_1_0.npy
    └ vel2_1_0.npy
    └ vel4_1_0.npy
  CurveFault_B:
    └ seis6_1_0.npy
    └ seis8_1_0.npy
    └ vel6_1_0.npy
    └ vel8_1_0.npy
  CurveVel_A:
    data:
      └ data1.npy
      └ data2.npy
    model:
      └ model1.npy
      └ model2.npy
  CurveVel_B:
    data:
      └ data1.npy
      └ data2.npy
    model:
      └ model1.npy
      └ model2.npy
  FlatFault_A:
    └ seis2_1_0.npy
    └ seis4_1_0.npy
    └ vel2_1_0.npy
    └ vel4_1_0.npy
  FlatFault_B:
    └ seis6_1_0.npy
    └ seis8_1_0.npy
    └ vel6_1_0.npy
    └ vel8_1_0.npy
  FlatVel_A:
    data:
      └ data1.npy
      └ data2.npy
    model:
      └ model1.npy
      └ model2.npy
  FlatVel_B:
    data:
      └ data1.npy
      └ data2.npy
    model:
      └ model1.npy
      └ model2.npy
  Style_A:
    data:
      └ data1.npy
      └ data2.npy
    model:
      └ model1.npy
      └ model2.npy
  Style_B:
    data:
      └ data1.npy
      └ data2.npy
    model:
   

In [4]:
import numpy as np
import os
from tqdm import tqdm

data_dir = "./waveform-inversion/train_samples"
out_dir = "./small_batches"
os.makedirs(out_dir, exist_ok=True)

wave_paths = []
vel_paths = []

# ① ファイル一覧を取得
for family in os.listdir(data_dir):
    fam_path = os.path.join(data_dir, family)
    if not os.path.isdir(fam_path): continue

    for fname in os.listdir(fam_path):
        if fname.startswith("seis") and fname.endswith(".npy"):
            wave_paths.append(os.path.join(fam_path, fname))
        if fname.startswith("vel") and fname.endswith(".npy"):
            vel_paths.append(os.path.join(fam_path, fname))

    for sub, lst in [("data", wave_paths), ("model", vel_paths)]:
        subp = os.path.join(fam_path, sub)
        if os.path.isdir(subp):
            for fname in os.listdir(subp):
                if fname.endswith(".npy"):
                    lst.append(os.path.join(subp, fname))

# ② ソートしてペアを揃える
wave_paths.sort()
vel_paths.sort()
assert len(wave_paths) == len(vel_paths), "波形と速度マップの数が合いません"

# ③ シャッフル用インデックスを生成
N = len(wave_paths)
perm = np.random.permutation(N)

# ④ バッチ単位でロードして保存
batch_size = 1000
num_batches = N // batch_size

for i in range(num_batches):
    start = i * batch_size
    end = (i + 1) * batch_size
    wave_batch = []
    vel_batch = []

    for j in perm[start:end]:
        wave = np.load(wave_paths[j])
        vel = np.load(vel_paths[j])
        wave_batch.append(wave)
        vel_batch.append(vel)

    wave_batch = np.stack(wave_batch)
    vel_batch = np.stack(vel_batch)

    np.save(os.path.join(out_dir, f"waves_batch_{i+1:02d}.npy"), wave_batch)
    np.save(os.path.join(out_dir, f"vels_batch_{i+1:02d}.npy"),  vel_batch)
    print(f"Saved batch {i+1}")


In [5]:
all_waves = np.concatenate([np.load(p) for p in wave_paths], axis=0)
all_vels  = np.concatenate([np.load(p) for p in vel_paths],  axis=0)

NameError: name 'wave_files' is not defined

In [None]:
# 必要な変数が定義されていることを確認
N = all_waves.shape[0]  # または vels_shuffled.shape[0]
idx = np.random.permutation(N)

# シャッフル
waves_shuffled = all_waves[idx]
vels_shuffled  = all_vels[idx]


In [None]:

# 表示する枚数
n = 30
cols = 6
rows = (n + cols - 1) // cols

fig, axes = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
axes = axes.flatten()

for i in range(n):
    orig_i = idx[i]  # 新しい位置 i に対応する「元のインデックス」
    ax = axes[i]
    ax.imshow(vels_shuffled[i, 0], cmap='jet', aspect='auto')
    ax.set_title(f"New {i} ← Orig {orig_i}", fontsize=8)
    ax.axis('off')

# 余ったサブプロットは消しておく
for j in range(n, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class WaveformDataset(Dataset):
    def __init__(self, waves_path, vels_path):
        self.waves = np.load(waves_path)  # shape (N, 5,1000,70)
        self.vels  = np.load(vels_path)   # shape (N, 1,70,70)
    def __len__(self):
        return len(self.waves)
    def __getitem__(self, idx):
        x = torch.from_numpy(self.waves[idx]).float()
        y = torch.from_numpy(self.vels[idx]).float()
        return x, y

# 例: 1つ目のバッチを読み込む
ds = WaveformDataset("small_batches/waves_batch_01.npy",
                     "small_batches/vels_batch_01.npy")
loader = DataLoader(ds, batch_size=32, shuffle=True)

for xb, yb in loader:
    print(xb.shape, yb.shape)  # torch.Size([32,5,1000,70]), torch.Size([32,1,70,70])
    break
