In [1]:
import tgt
import os
import time


In [2]:
PATH_AUDIO = "../app/static/wav/test/wav"
PATH_ALIGNMENTS = "../app/static/alignments/en"

In [3]:
tg = tgt.io.read_textgrid(f"{PATH_ALIGNMENTS}/id10309_XMLMvfrgdzY_00009.TextGrid") 
tg.get_tier_by_name("phones")


IntervalTier(start_time=0.0, end_time=5.960063, name="phones", objects=[Interval(0.0, 0.05, "L"), Interval(0.05, 0.11, "AY1"), Interval(0.11, 0.48, "K"), Interval(0.48, 0.53, "G"), Interval(0.53, 0.68, "ER1"), Interval(0.68, 0.76, "L"), Interval(0.76, 0.83, "Z"), Interval(0.83, 0.89, "W"), Interval(0.89, 0.94, "IH0"), Interval(0.94, 0.99, "DH"), Interval(0.99, 1.04, "AA0"), Interval(1.04, 1.11, "R"), Interval(1.11, 1.23, "T"), Interval(1.23, 1.28, "IH1"), Interval(1.28, 1.33, "S"), Interval(1.33, 1.4, "T"), Interval(1.4, 1.44, "IH0"), Interval(1.44, 1.5, "K"), Interval(1.5, 1.59, "S"), Interval(1.59, 1.64, "EH2"), Interval(1.64, 1.72, "N"), Interval(1.72, 1.77, "S"), Interval(1.77, 1.8, "IH0"), Interval(1.8, 1.9, "B"), Interval(1.9, 1.99, "IH1"), Interval(1.99, 2.09, "L"), Interval(2.09, 2.12, "IH0"), Interval(2.12, 2.16, "T"), Interval(2.16, 2.34, "IY0"), Interval(2.34, 2.4, "AH0"), Interval(2.4, 2.47, "N"), Interval(2.47, 2.5, "D"), Interval(2.5, 2.51, "HH"), Interval(2.51, 2.6, "UW1

In [4]:
def get_audio_files(path_audio):
    """Retrieve all .wav files in the given directory."""
    wav_files = []
    for root, _, files in os.walk(path_audio):
        for file in files:
            if file.endswith('.wav'):
                wav_files.append(os.path.join(root, file))
    return wav_files

In [5]:
get_audio_files(PATH_AUDIO)
len(get_audio_files(PATH_AUDIO)) # 4874

4874

In [6]:
def get_alignment_file(audio_file):
    """Get the corresponding TextGrid alignment file for a given audio file."""
    parts = audio_file.split(os.sep)
    id_person, identifiant, numero_audio = parts[-3], parts[-2], os.path.splitext(parts[-1])[0]
    alignment_file = os.path.join(PATH_ALIGNMENTS, f"{id_person}_{identifiant}_{numero_audio}.TextGrid")
    return alignment_file

In [7]:
get_alignment_file("../app/static/wav/test/wav\\id10278\\Pp-rAswo4Xg\\00027.wav")

'../app/static/alignments/en\\id10278_Pp-rAswo4Xg_00027.TextGrid'

In [8]:
tg = tgt.io.read_textgrid(get_alignment_file("../app/static/wav/test/wav\\id10278\\Pp-rAswo4Xg\\00027.wav"))
tg.get_tier_by_name("phones")

IntervalTier(start_time=0.0, end_time=5.200062, name="phones", objects=[Interval(0.43, 0.53, "L"), Interval(0.53, 0.61, "UH1"), Interval(0.61, 0.71, "K"), Interval(0.71, 0.83, "IH0"), Interval(0.83, 1.14, "NG"), Interval(1.41, 1.77, "W"), Interval(1.77, 1.82, "IH1"), Interval(1.82, 1.93, "T"), Interval(1.93, 1.97, "N"), Interval(1.97, 2.04, "AH0"), Interval(2.04, 2.15, "S"), Interval(2.15, 2.21, "IH0"), Interval(2.21, 2.27, "N"), Interval(2.27, 2.37, "AH1"), Interval(2.37, 2.48, "F"), Interval(2.48, 2.53, "T"), Interval(2.53, 2.65, "AH0"), Interval(2.65, 2.7, "HH"), Interval(2.7, 2.83, "IH1"), Interval(2.83, 2.94, "S"), Interval(2.94, 3.02, "T"), Interval(3.02, 3.14, "ER0"), Interval(3.14, 3.28, "IY0"), Interval(3.67, 3.72, "T"), Interval(3.72, 3.82, "AH0"), Interval(3.82, 3.99, "S"), Interval(3.99, 4.19, "IY1"), Interval(4.19, 4.34, "DH"), Interval(4.34, 4.46, "AH0"), Interval(4.46, 4.52, "T"), Interval(4.52, 4.68, "W"), Interval(4.68, 4.86, "IY1")])

In [9]:
def calculate_phonemic_flow(alignment_file):
    """Calculate the phonemic flow (phonemes per second) from a TextGrid alignment file."""
    try:
        tg = tgt.io.read_textgrid(alignment_file)
        phones_tier = tg.get_tier_by_name("phones")
        total_duration = 0
        phoneme_count = 0

        for interval in phones_tier:
            phoneme_count += 1
            total_duration += interval.end_time - interval.start_time

        return phoneme_count / total_duration if total_duration > 0 else 0

    except Exception as e:
        print(f"Error processing {alignment_file}: {e}")
        return 0

In [10]:
calculate_phonemic_flow(get_alignment_file("../app/static/wav/test/wav\\id10270\\5r0dWxy17C8\\00001.wav"))

13.210445468509983

In [35]:
audio_files = get_audio_files(PATH_AUDIO)

start_time = time.time()
for audio_file in audio_files:
    alignment_file = get_alignment_file(audio_file)
    flow_phonemic = calculate_phonemic_flow(alignment_file)
    print(f"{audio_file} flow_phonemic : {flow_phonemic}")

end_time = time.time()

print(f"Processing completed in {(end_time - start_time) / 60:.2f} minutes")


../app/static/wav/test/wav\id10270\5r0dWxy17C8\00001.wav flow_phonemic : 13.210445468509983
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00002.wav flow_phonemic : 11.111111111111112
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00003.wav flow_phonemic : 16.569767441860467
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00004.wav flow_phonemic : 10.07751937984496
Error processing ../app/static/alignments/en\id10270_5r0dWxy17C8_00005.TextGrid: [Errno 2] No such file or directory: '../app/static/alignments/en\\id10270_5r0dWxy17C8_00005.TextGrid'
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00005.wav flow_phonemic : 0
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00006.wav flow_phonemic : 12.082262210796914
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00007.wav flow_phonemic : 13.466550825369247
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00008.wav flow_phonemic : 11.74129353233831
../app/static/wav/test/wav\id10270\5r0dWxy17C8\00009.wav flow_phonemic : 14.6875
../app/static/wav/