# Libraries and Imports

In [1]:
%pip install pydub


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [7]:
import sys
import os
import subprocess

# adding the directory containing asr/asr_pipeline.py to the Python path
sys.path.append(os.path.abspath("../asr"))

In [3]:
from pydub import AudioSegment
from asr_pipeline import asr_infer_pipeline

# ASR
Section to load audio from disk and get transcript

In [4]:
def convert_to_wav(input_file: str, output_file: str) -> str:
    """
    Converts an audio file to .wav format.

    Args:
        input_file (str): The path to the input audio file.
        output_file (str): The path to the output .wav file.

    Returns:
        str: The path to the converted .wav file.
    """
    # Load the audio file
    audio = AudioSegment.from_file(input_file)

    # Export as .wav
    audio.export(output_file, format="wav")

    return output_file

Covert audio file to wav format

In [5]:
input_file_path = "../ltl/test_samples/test01.m4a"  
output_file_path = "../ltl/test_samples/test01.wav"  

converted_file = convert_to_wav(input_file_path, output_file_path)



In [9]:
transcript = asr_infer_pipeline(converted_file) # transcribe file

# Create TextGrid File

In [20]:
def call_aeneas_dynamic(audio_file_path: str, text_transcript: str) -> str:
    """
    Call Aeneas to generate a TextGrid file for the given audio file and transcript.

    Args:
        audio_file_path (str): The path to the audio file.
        text_transcript (str): The transcription text.

    Returns:
        str: The relative path to the generated TextGrid file.

    Raises:
        subprocess.CalledProcessError: If the command to run Aeneas fails.
    """
    # Define directories based on the current working directory
    current_dir = os.getcwd()
    PROJECT_DIR = os.path.abspath(os.path.join(current_dir, '..'))
    AENEAS_DIR = os.path.join(PROJECT_DIR, 'aeneas')
    TEMP_TEXT_FILE_PATH = os.path.join(PROJECT_DIR, 'temp_transcription.txt')
    TEXT_GRID_DIR = os.path.join(current_dir, 'text_grid_files')  # Create a text_grid_files folder in the current directory

    # Ensure the output directory exists
    if not os.path.exists(TEXT_GRID_DIR):
        os.makedirs(TEXT_GRID_DIR)
        print(f"Created directory {TEXT_GRID_DIR}")

    # Write the transcription text to the temp file
    with open(TEMP_TEXT_FILE_PATH, 'w', encoding='utf-8') as temp_text_file:
        for word in text_transcript.split():
            temp_text_file.write(word + '\n')

    # Define the output file path
    output_file_path = os.path.join(TEXT_GRID_DIR, f"{os.path.splitext(os.path.basename(audio_file_path))[0]}.TextGrid")

    try:
        # Change to the aeneas directory
        os.chdir(AENEAS_DIR)

        # Define the command to run aeneas
        command = [
            'python3', '-m', 'aeneas.tools.execute_task',
            audio_file_path,
            TEMP_TEXT_FILE_PATH,
            'task_language=eng|is_text_type=plain|os_task_file_format=aud',
            output_file_path
        ]

        # Run the command
        subprocess.run(command, check=True)
        print(f"Generated TextGrid for {audio_file_path} to {output_file_path}")
    except subprocess.CalledProcessError as e:
        print(f"Failed to generate TextGrid for {audio_file_path}: {e}")
    finally:
        # Return to the original directory
        os.chdir(current_dir)
        # Clean up the temporary text file
        if os.path.exists(TEMP_TEXT_FILE_PATH):
            os.remove(TEMP_TEXT_FILE_PATH)

    return os.path.relpath(output_file_path, start=current_dir)


In [21]:
# Example usage:
audio_file_path = "../ltl/test_samples/test01.wav"  # Replace with the path to your audio file
text_transcript = transcript  # Replace with your transcription text

output_text_grid = call_aeneas_dynamic(audio_file_path, text_transcript)
print(f"TextGrid saved to: {output_text_grid}")

Created directory /Users/kayems/Library/CloudStorage/OneDrive-AshesiUniversity/IUDvPP/ltl/text_grid_files
[INFO] Validating config string (specify --skip-validator to bypass)...
[INFO] Validating config string... done
[INFO] Creating task...
[INFO] Creating task... done
[INFO] Executing task...
[INFO] Executing task... done
[INFO] Creating output sync map file...
[INFO] Creating output sync map file... done
[92m[INFO] Created file '/Users/kayems/Library/CloudStorage/OneDrive-AshesiUniversity/IUDvPP/ltl/text_grid_files/test01.TextGrid'[0m
Generated TextGrid for ../ltl/test_samples/test01.wav to /Users/kayems/Library/CloudStorage/OneDrive-AshesiUniversity/IUDvPP/ltl/text_grid_files/test01.TextGrid
TextGrid saved to: text_grid_files/test01.TextGrid
