<a href="https://colab.research.google.com/github/Sourasky-DHLAB/Whisper/blob/main/OtherASRs/ASR_SpeechBrain_Amharic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Library Installations

In [None]:
!pip install speechbrain transformers librosa numpy soundfile pydub

# Library Imports

In [None]:
# Importing necessary libraries for audio processing, model inference, and utility functions.
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tqdm import tqdm
from speechbrain.pretrained import EncoderASR
import os

# Define Path to Audio File

In [None]:
# Define the path to your audio file here
AUDIO_FILE_PATH = "/content/drive/MyDrive/Whisper/Audio/test_2.wav"

# Configuration and Model Loading

In [None]:
# Setting up the device for inference (using GPU in this case).
run_opts = {"device": "cuda"}

# Loading the pre-trained ASR model from speechbrain.
asr_model = EncoderASR.from_hparams(
    source="speechbrain/asr-wav2vec2-dvoice-amharic",
    savedir="pretrained_models/asr-wav2vec2-dvoice-amharic",
    run_opts=run_opts
)

# Load and Split Audio

In [None]:
# Load the audio from the specified path
audio = AudioSegment.from_wav(AUDIO_FILE_PATH)
chunk_length_ms = 30 * 1000  # 30 seconds in milliseconds

# Splitting the audio file into chunks based on detected silence.
audio_chunks = split_on_silence(audio, min_silence_len=1000, silence_thresh=-40)

# Ensure Temporary Directory Exists

In [None]:
# Create a temporary directory for storing audio chunks if it doesn't exist.
if not os.path.exists("/content/temp_dir"):
    os.makedirs("/content/temp_dir")

# Process Audio Chunks and Transcribe

In [None]:
# List to store the transcriptions of each audio chunk.
transcripts = []

# Progress bar to visualize the processing of audio chunks.
progress_bar = tqdm(total=len(audio_chunks), desc="Processing Chunks")

# Iterating over each audio chunk.
for i, chunk in enumerate(audio_chunks):
    # Saving the current audio chunk to a temporary file.
    chunk_path = f"/content/temp_dir/chunk_{i}.wav"
    chunk.export(chunk_path, format="wav")

    # Using the ASR model to transcribe the audio chunk.
    transcription = asr_model.transcribe_file(chunk_path)

    # Appending the transcription to the list.
    transcripts.append(transcription)

    # Updating the progress bar.
    progress_bar.update(1)

# Closing the progress bar after all chunks are processed.
progress_bar.close()

# Compile Results and Display

In [None]:
# Combining the transcriptions of all chunks to get the full transcription.
full_transcript = " ".join(transcripts)

# Displaying the full transcription and processing time.
print("Full Transcript:")
print(full_transcript)

# Save Output

In [None]:
# Extracting the base name without extension
audio_base_name = os.path.basename(AUDIO_FILE_PATH)  # e.g., "t0102.wav"
audio_name_without_ext = os.path.splitext(audio_base_name)[0]  # e.g., "t0102"

# Saving the transcript to a .txt file with the extracted name
txt_file_path = f"/content/drive/MyDrive/Whisper/Transcriptions/{audio_name_without_ext}.txt"

with open(txt_file_path, 'w') as f:
    f.write(full_transcript)