In [None]:
import os
import wave
import pympi.Elan as Elan
import numpy as np

def normalize_audio(audio_data):
    return audio_data / np.max(np.abs(audio_data))

def detect_ipus(audio_path, min_sil_dur, min_ipu_dur, silence_percentile):
    with wave.open(audio_path, 'rb') as wf:
        framerate = wf.getframerate()
        nframes = wf.getnframes()
        audio_data = np.frombuffer(wf.readframes(nframes), dtype=np.int16)

    # Normalize audio
    norm_audio = normalize_audio(audio_data)
    abs_audio = np.abs(norm_audio)

    # Set threshold based on percentile
    threshold = np.percentile(abs_audio, silence_percentile)
    print(f"Silence threshold for {audio_path}: {threshold:.4f}")

    silence_mask = abs_audio < threshold
    sil_start = None
    current_ipu = []
    ipus = []
    print(silence_mask)
    for i, silent in enumerate(silence_mask):
        time_sec = i / framerate
        if silent and sil_start is None:
            sil_start = time_sec
        elif not silent:
            if sil_start is not None:
                silence_duration = time_sec - sil_start
                if silence_duration >= min_sil_dur:
                    if current_ipu:
                        ipus.append((current_ipu[0], sil_start))
                    current_ipu = [time_sec]
                sil_start = None

    # Handle the last IPU
    if current_ipu and len(current_ipu) == 1:
        ipus.append((current_ipu[0], len(audio_data) / framerate))

    # Filter out short IPUs
    filtered_ipus = [(start, end) for start, end in ipus if end - start >= min_ipu_dur]
    return filtered_ipus

def segment_audio_to_ipu(input_folder, output_folder, min_sil_dur, min_ipu_dur, silence_percentile):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for file_name in os.listdir(input_folder):
        if file_name.lower().endswith('.wav'):
            input_path = os.path.join(input_folder, file_name)
            output_eaf_path = os.path.join(output_folder, os.path.splitext(file_name)[0] + '.eaf')

            try:
                print(f"\nProcessing {file_name}...")

                # Detect IPUs
                tracks = detect_ipus(input_path, min_sil_dur, min_ipu_dur, silence_percentile)
                print(f"Tracks found for {file_name}: {tracks}")
                print("Nb IPUs: "+str(len(tracks)))

                # Create an EAF file
                eaf_file = Elan.Eaf()
                eaf_file.add_tier("IPUs")
                eaf_file.add_linked_file(file_name)

                for i, (start, end) in enumerate(tracks):
                    annotation = f"IPU_{i+1}"
                    eaf_file.add_annotation("IPUs", int(start * 1000), int(end * 1000), annotation)

                # Save the EAF file
                eaf_file.to_file(output_eaf_path)
                print(f"Saved: {output_eaf_path}")

            except Exception as e:
                print(f"Error processing {file_name}: {e}")

min_sil_dur = 0.25
min_ipu_dur = 0.3
silence_percentile = 70

# Notebook-friendly execution
input_folder = "/home/or-llsh-156-l01/projets/these/audios"
output_folder = "/home/or-llsh-156-l01/projets/these/output2"
segment_audio_to_ipu(input_folder, 
                     output_folder, 
                     min_sil_dur, 
                     min_ipu_dur, 
                     silence_percentile)



Processing BEMW_HH19.wav...
Silence threshold for /home/or-llsh-156-l01/projets/these/audios/BEMW_HH19.wav: 0.0454
[ True  True  True ...  True  True  True]
Tracks found for BEMW_HH19.wav: [(2.6041875, 6.608125), (7.094375, 11.1841875)]
Nb IPUs: 2
Saved: /home/or-llsh-156-l01/projets/these/output2/BEMW_HH19.eaf

Processing interview_foot.wav...
Silence threshold for /home/or-llsh-156-l01/projets/these/audios/interview_foot.wav: 0.0162
[ True  True  True ...  True  True  True]
Tracks found for interview_foot.wav: [(22.172333333333334, 22.5735), (22.883458333333333, 30.280125), (31.125, 40.244875), (41.689708333333336, 48.93025), (50.50595833333333, 59.70758333333333), (60.033458333333336, 62.61120833333333), (62.91654166666667, 65.78225), (66.07220833333334, 66.40625), (66.86708333333333, 67.19325), (67.64508333333333, 69.497375), (70.40554166666666, 85.20895833333333), (85.57854166666667, 94.69691666666667), (95.13766666666666, 101.44145833333333), (101.97629166666667, 108.22179166666