In [None]:
#!/usr/bin/env python3
"""
move_val_segments.py

Move validation segments listed in val.lst from train-0 to val directory,
using os.path.join for path construction and adapting to val.lst’s “train-0/…” entries.

Folder structure example:
  data/Valentini/valentini/
      ├ train-0/
      │    ├ clean/
      │    │    └ p226/p226_001_000.wav
      │    │    └ ...
      │    └ noisy/
      │         └ p226/p226_001_000.wav
      │         └ ...
      ├ val.lst   (lines like "train-0/noisy/p226/p226_001_000.wav")
      └ (other splits)

After running, for each entry in val.lst (“train-0/clean/… ” or “train-0/noisy/…”),
the script will:
  - move the corresponding clean file from train-0/clean/... to val/clean/...
  - move the corresponding noisy file from train-0/noisy/... to val/noisy/...
"""

def train2val():
    import os
    import shutil

    # 1) Base directory where train-0 and val.lst reside
    data_root = os.path.join("data", "Valentini", "valentini")
    # Example Windows path: r"C:\Users\AITER\Documents\Pythonworkspace\SE-RL\data\Valentini\valentini"

    # 2) Paths to train-0 clean/noisy and val clean/noisy
    train0_clean_dir = os.path.join(data_root, "train-0", "clean")
    train0_noisy_dir = os.path.join(data_root, "train-0", "noisy")
    val_clean_dir = os.path.join(data_root, "val", "clean")
    val_noisy_dir = os.path.join(data_root, "val", "noisy")

    # 3) Ensure val clean/noisy directories exist
    os.makedirs(val_clean_dir, exist_ok=True)
    os.makedirs(val_noisy_dir, exist_ok=True)

    # 4) Path to val.lst
    val_list_path = os.path.join(data_root, "val.lst")
    if not os.path.exists(val_list_path):
        raise FileNotFoundError(f"val.lst not found at {val_list_path}")

    # 5) Read val.lst and move files
    with open(val_list_path, "r", encoding="utf-8") as f:
        for line in f:
            rel_path = line.strip()  # e.g., "train-0/noisy/p226/p226_001_000.wav"
            if not rel_path:
                continue

            # Split on forward slash regardless of OS
            parts = rel_path.split("/")
            # Expect parts: ["train-0", "clean" or "noisy", "p226", "p226_001_000.wav"]
            if len(parts) < 4 or parts[0] != "train-0":
                print(f"[WARN] Unexpected entry in val.lst: {rel_path}")
                continue

            _, category, spk, fname = parts[:4]
            rel_subpath = os.path.join(spk, fname)  # "p226/p226_001_000.wav"

            # Source and destination for clean
            if category == "clean":
                src_clean = os.path.join(train0_clean_dir, rel_subpath)
            else:
                src_clean = os.path.join(train0_clean_dir, spk, fname)  # still under train-0/clean
            dst_clean = os.path.join(val_clean_dir, spk, fname)
            os.makedirs(os.path.dirname(dst_clean), exist_ok=True)
            if os.path.exists(src_clean):
                shutil.move(src_clean, dst_clean)
            else:
                print(f"[WARN] Clean file not found: {src_clean}")

            # Source and destination for noisy
            if category == "noisy":
                src_noisy = os.path.join(train0_noisy_dir, rel_subpath)
            else:
                src_noisy = os.path.join(train0_noisy_dir, spk, fname)
            dst_noisy = os.path.join(val_noisy_dir, spk, fname)
            os.makedirs(os.path.dirname(dst_noisy), exist_ok=True)
            if os.path.exists(src_noisy):
                shutil.move(src_noisy, dst_noisy)
            else:
                print(f"[WARN] Noisy file not found: {src_noisy}")

    print("Finished moving all validation segments.")

In [None]:
def testaudio():
    import soundfile as sf
    audio, sr = sf.read('data/Valentini/valentini/train-0/clean/p237/p237_001_000.wav')
    print(audio.shape, sr)

In [1]:
def count_wav_files(directory):
    import os

    count = 0
    for entry in os.scandir(directory):
        if entry.is_file() and entry.name.lower().endswith('.wav'):
            count += 1
        elif entry.is_dir():
            count += count_wav_files(entry.path)
    return count

Total .wav files: 234270


## Execute the code you want below

In [7]:
count_wav_files("data/Valentini/valentini/train-0")

234270