### Start with clean Data Folder Structure setup

In [16]:
import os
import shutil
import hashlib

# Pfade
source_dir = "data/TEST"
target_dir = "TEST_CLEAN"
os.makedirs(target_dir, exist_ok=True)

# Alle WAV-Dateien finden
for root, _, files in os.walk(source_dir):
    for file in files:
        if file.lower().endswith(".wav"):
            wav_path = os.path.join(root, file)
            phn_path = wav_path.rsplit(".", 1)[0] + ".PHN"

            if not os.path.exists(phn_path):
                print(f"⚠️  Keine passende PHN-Datei für: {wav_path}")
                continue

            # Eindeutigen Hash aus Pfad erzeugen
            unique_id = hashlib.sha1(wav_path.encode()).hexdigest()[:8]

            # Zielpfade
            new_wav = os.path.join(target_dir, f"{unique_id}.wav")
            new_phn = os.path.join(target_dir, f"{unique_id}.PHN")

            # Dateien kopieren
            shutil.copyfile(wav_path, new_wav)
            shutil.copyfile(phn_path, new_phn)

            print(f"✅ Kopiert: {new_wav}, {new_phn}")


✅ Kopiert: TEST_CLEAN\6d07cee2.wav, TEST_CLEAN\6d07cee2.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SA1.WAV.wav
✅ Kopiert: TEST_CLEAN\71ef50b4.wav, TEST_CLEAN\71ef50b4.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SA2.WAV.wav
✅ Kopiert: TEST_CLEAN\63da8953.wav, TEST_CLEAN\63da8953.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SI1573.WAV.wav
✅ Kopiert: TEST_CLEAN\a8425cf3.wav, TEST_CLEAN\a8425cf3.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SI2203.WAV.wav
✅ Kopiert: TEST_CLEAN\c996ef30.wav, TEST_CLEAN\c996ef30.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SI943.WAV.wav
✅ Kopiert: TEST_CLEAN\78f1529e.wav, TEST_CLEAN\78f1529e.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SX133.WAV.wav
✅ Kopiert: TEST_CLEAN\2978a1a7.wav, TEST_CLEAN\2978a1a7.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR1\FAKS0\SX223.WAV.wav
✅ Kopiert: TEST_CLEAN\1a4bbaaa.wav, TEST_CLEAN\1a4bbaaa.PHN
⚠️  Keine passende PHN-Datei für: data/TEST\DR

### Phoneme to Vismen MAP

In [17]:
import os
import re

# Mapping von Phonemen → Visemen
phoneme_to_viseme = {
    # MPB (Lippen geschlossen)
    "m": "MPB", "p": "MPB", "b": "MPB", "em": "MPB", "en": "MPB",

    # FV (Zähne auf Lippe)
    "f": "FV", "v": "FV",

    # AE (offener Mund, Vokale)
    "ae": "AE","axr": "AE", "aa": "AE","aw": "AE","ay": "AE", "ah": "AE", "ao": "AE", "eh": "AE", "er": "AE", "ax": "AE", "ix": "AE",

    # YIL (Zunge vorn, schmaler Mund)
    "iy": "YIL", "ih": "YIL", "ey": "YIL", "el": "YIL", "l": "YIL", "y": "YIL",

    # WURO (rund, Lippen nach vorn)
    "w": "WURO","oy": "WURO","ux": "WURO", "uw": "WURO", "uh": "WURO", "r": "WURO", "ow": "WURO", "ax-h": "WURO",

    # SK (Zunge oben oder hinten)
    "s": "SK", "eng": "SK", "z": "SK", "t": "SK", "d": "SK","dx": "SK", "n": "SK", "nx": "SK", "ng": "SK","k": "SK", "g": "SK", "ch": "SK", "jh": "SK", "sh": "SK", "zh": "SK", "th": "SK", "dh": "SK",

    # SIL (alle Pausen, Geräusche etc.)
    "sil": "SIL", "pau": "SIL", "h#": "SIL", "epi": "SIL", "pcl": "SIL", "tcl": "SIL", "kcl": "SIL", "bcl": "SIL", "dcl": "SIL", "gcl": "SIL", "q": "SIL", "hh": "SIL", "hv": "SIL"
}

def clean_phoneme(phoneme):
    # Entferne Stressmarker (z. B. oy1 → oy)
    return re.sub(r"\d$", "", phoneme.lower())

def map_phn_file_to_viseme(phn_path, output_path):
    with open(phn_path, "r") as f_in, open(output_path, "w") as f_out:
        for line in f_in:
            parts = line.strip().split()
            if len(parts) != 3:
                continue  # Ungültige Zeile
            start, end, phoneme = parts
            phoneme_clean = clean_phoneme(phoneme)
            viseme = phoneme_to_viseme.get(phoneme_clean, "SIL")
            f_out.write(f"{start} {end} {viseme}\n")

def process_all_phns(phn_dir):
    for file in os.listdir(phn_dir):
        if not file.lower().endswith(".phn"):
            continue
        full_path = os.path.join(phn_dir, file)
        output_path = full_path.replace(".PHN", ".viseme.PHN").replace(".phn", ".viseme.PHN")
        map_phn_file_to_viseme(full_path, output_path)
        print(f"✅ Gemappt: {file} → {os.path.basename(output_path)}")


In [22]:
import os
import librosa
import numpy as np

# Feature-Parameter
sr = 16000
n_fft = 400           # 25ms Fenster
hop_length = 160      # 10ms Hop
n_mfcc = 13           # MFCC-Koeffizienten

# Verzeichnisse
data_dir = "TEST_CLEAN"
features_out = "mfcc_features_TEST"
labels_out = "labels"
os.makedirs(features_out, exist_ok=True)
os.makedirs(labels_out, exist_ok=True)

def get_viseme_labels(phn_path, num_frames, frame_hop_sec):
    labels = ["SIL"] * num_frames

    with open(phn_path, "r") as f:
        for line in f:
            start_s, end_s, viseme = line.strip().split()
            start = int(start_s) / sr
            end = int(end_s) / sr
            start_idx = int(start / frame_hop_sec)
            end_idx = int(end / frame_hop_sec)
            for i in range(start_idx, min(end_idx, num_frames)):
                labels[i] = viseme
    return labels

def process_file(wav_path):
    base = os.path.splitext(os.path.basename(wav_path))[0]
    viseme_path = os.path.join(data_dir, f"{base}.viseme.PHN")

    if not os.path.exists(viseme_path):
        print(f"⚠️ Keine viseme.PHN für {base}")
        return

    # Lade Audio
    y, _ = librosa.load(wav_path, sr=sr)

    # MFCC + Deltas extrahieren
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)

    # Stacken und transponieren → [T, 39]
    features_combined = np.vstack([mfcc, delta, delta2]).T
    num_frames = features_combined.shape[0]

    # Viseme-Labels laden
    frame_labels = get_viseme_labels(viseme_path, num_frames, hop_length / sr)

    # Save
    np.save(os.path.join(features_out, f"{base}.npy"), features_combined)
    with open(os.path.join(labels_out, f"{base}.txt"), "w") as f:
        f.write("\n".join(frame_labels))

    print(f"✅ {base}: {num_frames} Frames, {len(set(frame_labels))} Visemen")

# Alle WAVs verarbeiten
for file in os.listdir(data_dir):
    if file.endswith(".wav"):
        process_file(os.path.join(data_dir, file))


✅ 00036d5d: 249 Frames, 4 Visemen
✅ 0025b872: 315 Frames, 5 Visemen
✅ 00621de9: 255 Frames, 7 Visemen
✅ 007cddbc: 235 Frames, 7 Visemen
✅ 007d6a63: 151 Frames, 7 Visemen
✅ 00c91d71: 317 Frames, 5 Visemen
✅ 00d6dc92: 301 Frames, 7 Visemen
✅ 01224766: 220 Frames, 6 Visemen
✅ 013e4cd1: 531 Frames, 7 Visemen
✅ 014893ce: 347 Frames, 7 Visemen
✅ 015ac00c: 271 Frames, 7 Visemen
✅ 015bb7c5: 351 Frames, 6 Visemen
✅ 018401ae: 321 Frames, 7 Visemen
✅ 019a4fa0: 252 Frames, 7 Visemen
✅ 01a3321a: 289 Frames, 7 Visemen
✅ 01bf5ebf: 261 Frames, 6 Visemen
✅ 01c7a206: 214 Frames, 5 Visemen
✅ 01e19abb: 363 Frames, 5 Visemen
✅ 01f2cf57: 214 Frames, 6 Visemen
✅ 01fe6439: 255 Frames, 6 Visemen
✅ 022e2f85: 261 Frames, 7 Visemen
✅ 02370ceb: 335 Frames, 7 Visemen
✅ 02f72248: 351 Frames, 5 Visemen
✅ 032e7bae: 283 Frames, 6 Visemen
✅ 0356458b: 368 Frames, 5 Visemen
✅ 03b07d6d: 452 Frames, 7 Visemen
✅ 03f9abbd: 380 Frames, 7 Visemen
✅ 041ba21c: 270 Frames, 5 Visemen
✅ 0449fafd: 315 Frames, 7 Visemen
✅ 0474918b: 27

In [19]:
process_all_phns("TEST_CLEAN")

✅ Gemappt: 00036d5d.PHN → 00036d5d.viseme.PHN
✅ Gemappt: 0025b872.PHN → 0025b872.viseme.PHN
✅ Gemappt: 00621de9.PHN → 00621de9.viseme.PHN
✅ Gemappt: 007cddbc.PHN → 007cddbc.viseme.PHN
✅ Gemappt: 007d6a63.PHN → 007d6a63.viseme.PHN
✅ Gemappt: 00c91d71.PHN → 00c91d71.viseme.PHN
✅ Gemappt: 00d6dc92.PHN → 00d6dc92.viseme.PHN
✅ Gemappt: 01224766.PHN → 01224766.viseme.PHN
✅ Gemappt: 013e4cd1.PHN → 013e4cd1.viseme.PHN
✅ Gemappt: 014893ce.PHN → 014893ce.viseme.PHN
✅ Gemappt: 015ac00c.PHN → 015ac00c.viseme.PHN
✅ Gemappt: 015bb7c5.PHN → 015bb7c5.viseme.PHN
✅ Gemappt: 018401ae.PHN → 018401ae.viseme.PHN
✅ Gemappt: 019a4fa0.PHN → 019a4fa0.viseme.PHN
✅ Gemappt: 01a3321a.PHN → 01a3321a.viseme.PHN
✅ Gemappt: 01bf5ebf.PHN → 01bf5ebf.viseme.PHN
✅ Gemappt: 01c7a206.PHN → 01c7a206.viseme.PHN
✅ Gemappt: 01e19abb.PHN → 01e19abb.viseme.PHN
✅ Gemappt: 01f2cf57.PHN → 01f2cf57.viseme.PHN
✅ Gemappt: 01fe6439.PHN → 01fe6439.viseme.PHN
✅ Gemappt: 022e2f85.PHN → 022e2f85.viseme.PHN
✅ Gemappt: 02370ceb.PHN → 02370ceb

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import os

# Pfad zur gespeicherten Feature-Datei
feature_path = "TEST_CLEAN/features/00c91d71.npy"  # <– ändere zu deiner Datei

# Lade die Log-Mel Features
features = np.load(feature_path)  # Shape: [frames, n_mels], z. B. [300, 40]

# Transponieren für Darstellung (Mel-Bänder auf Y-Achse)
features_T = features.T  # [n_mels, frames]

# Plotten
plt.figure(figsize=(10, 4))
plt.imshow(features_T, origin='lower', aspect='auto', cmap='viridis')
plt.title("Log-Mel Spectrogram")
plt.xlabel("Frame")
plt.ylabel("Mel-Band")
plt.colorbar(label="dB")
plt.tight_layout()
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'TEST_CLEAN/features/00c91d71.npy'