# Alignment Checker

This notebook allows you to interactively check the audio-to-phoneme alignment for segmented recordings.

In [None]:
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import Audio, display, clear_output

# Add project root to sys.path
sys.path.append(os.path.abspath('..'))

from data_prepare.audio_utils import get_sampling_rate

In [None]:
# CONFIGURATION
DATA_DIR = '../datalocal/v260210_24kHz/readtext_split'

if not Path(DATA_DIR).exists():
    print(f"Warning: DATA_DIR {DATA_DIR} not found. Please run the segmentation script first.")

In [None]:
def get_speaker_data(data_dir):
    data_dir = Path(data_dir)
    all_files = sorted(list(data_dir.glob("*.wav")))
    
    speakers = {}
    for f in all_files:
        speaker_id = f.stem.split('_')[0]
        if speaker_id not in speakers:
            speakers[speaker_id] = []
        
        txt_path = f.with_suffix('.txt')
        text = ""
        if txt_path.exists():
            with open(txt_path, 'r', encoding='utf-8') as tf:
                text = tf.read()
        
        speakers[speaker_id].append({
            'stem': f.stem,
            'text': text
        })
    
    return speakers

SPEAKER_DATA = get_speaker_data(DATA_DIR)
SPEAKER_LIST = sorted(list(SPEAKER_DATA.keys()))

In [None]:
def plot_alignment(stem):
    wav_path = Path(DATA_DIR) / f"{stem}.wav"
    csv_path = Path(DATA_DIR) / f"{stem}.csv"
    
    if not wav_path.exists() or not csv_path.exists():
        print(f"Error: Missing files for {stem}")
        return
    
    y, sr = librosa.load(wav_path, sr=None)
    df = pd.read_csv(csv_path, sep=';')
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8), sharex=True)
    
    librosa.display.waveshow(y, sr=sr, ax=ax1, alpha=0.5)
    ax1.set_title(f"Alignment for {stem}")
    ax1.set_ylabel("Amplitude")
    
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=ax2)
    ax2.set_ylabel("Hz")
    
    for _, row in df.iterrows():
        start_sec = row['BEGIN'] / sr
        ax1.axvline(start_sec, color='gray', linestyle='--', alpha=0.3)
        ax2.axvline(start_sec, color='white', linestyle='--', alpha=0.3)
        mid_sec = start_sec + (row['DURATION'] / (2 * sr))
        ax1.text(mid_sec, -0.6, row['MAU'], color='red', fontsize=9, horizontalalignment='center')

    df['block'] = (df['TOKEN'] != df['TOKEN'].shift()).cumsum()
    word_boundaries = df.groupby(['block', 'TOKEN']).agg({
        'BEGIN': 'min', 
        'DURATION': 'sum', 
        'ORT': 'first'
    }).reset_index()
    
    for _, row in word_boundaries.iterrows():
        start_sec = row['BEGIN'] / sr
        label = str(row['ORT']) if row['TOKEN'] >= 0 else '<p:>'
        color = 'blue' if row['TOKEN'] >= 0 else 'green'
        
        ax1.axvline(start_sec, color=color, linewidth=2, alpha=0.6)
        ax2.axvline(start_sec, color='cyan' if row['TOKEN'] >= 0 else 'lightgreen', linewidth=2, alpha=0.6)
        
        mid_sec = start_sec + (row['DURATION'] / (2 * sr))
        ax1.text(mid_sec, -0.9, label, color=color, fontsize=12, fontweight='bold', horizontalalignment='center')

    ax1.axvline(len(y)/sr, color='gray', linestyle='--', alpha=0.3)
    ax2.axvline(len(y)/sr, color='white', linestyle='--', alpha=0.3)

    plt.tight_layout()
    plt.show()
    plt.close(fig) # Prevent double display
    display(Audio(y, rate=sr))

In [None]:
# INTERACTIVE WIDGETS
speaker_select = widgets.Dropdown(options=SPEAKER_LIST, description='Speaker:')
segment_select = widgets.Dropdown(description='Segment:')
output_plot = widgets.Output()
output_table = widgets.Output()

def update_segments(change):
    speaker_id = speaker_select.value
    segments = SPEAKER_DATA[speaker_id]
    segment_select.options = [s['stem'] for s in segments]
    
    with output_table:
        output_table.clear_output()
        df_summary = pd.DataFrame(segments)
        display(df_summary)

def on_selection_change(change):
    with output_plot:
        output_plot.clear_output(wait=True)
        if segment_select.value:
            plot_alignment(segment_select.value)

speaker_select.observe(update_segments, 'value')
segment_select.observe(on_selection_change, 'value')

# Display UI
display(widgets.VBox([speaker_select, segment_select, output_table, output_plot]))

# Initial population
update_segments(None)