<a href="https://colab.research.google.com/github/Sourasky-DHLAB/Whisper/blob/main/speech_brain_amharic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pydub speechbrain transformers

In [None]:
import os
from pydub import AudioSegment
from speechbrain.inference.ASR import EncoderASR

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Path to your large audio file
audio_path = "/content/drive/MyDrive/Whisper/Audio/מפגש השלמה אשקלון קייס אביו חומר גלם.wav"

# Length of each chunk in milliseconds (e.g., 5 minutes)
chunk_length_ms = 5 * 60 * 1000  # 5 minutes * 60 seconds * 1000 ms

# Directory to save chunks
output_dir = "chunks"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Path to output transcription file
transcription_output_path = "full_transcription.txt"

In [None]:
# Load the large audio file
audio = AudioSegment.from_wav(audio_path)

# Split into chunks
chunk_files = []
for i in range(0, len(audio), chunk_length_ms):
    chunk = audio[i:i+chunk_length_ms]
    chunk_filename = os.path.join(output_dir, f"chunk_{i//chunk_length_ms}.wav")
    chunk.export(chunk_filename, format="wav")
    chunk_files.append(chunk_filename)

print(f"Created {len(chunk_files)} chunks.")

In [None]:
# Load the SpeechBrain ASR model, using GPU if available
asr_model = EncoderASR.from_hparams(
    source="speechbrain/asr-wav2vec2-dvoice-amharic",
    savedir="pretrained_models/asr-wav2vec2-dvoice-amharic",
    run_opts={"device":"cuda"}
)

In [None]:
all_transcriptions = []

for chunk_file in chunk_files:
    print(f"Transcribing {chunk_file}...")
    transcription = asr_model.transcribe_file(chunk_file)
    print(f"Transcription of {chunk_file}:", transcription)
    all_transcriptions.append(transcription)

print("All chunks transcribed.")

In [None]:
# Combine all chunk transcriptions
full_transcription = "\n".join(all_transcriptions)

# Save to a text file
with open(transcription_output_path, "w", encoding="utf-8") as f:
    f.write(full_transcription)

print(f"Full transcription saved to {transcription_output_path}")

In [None]:
# Display the first 500 characters of the transcription
print(full_transcription[:500])

In [None]:
# Define the full path for the output file in Google Drive
transcription_output_path = "/content/drive/MyDrive/Whisper/Transcriptions/full_transcription.txt"

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(transcription_output_path), exist_ok=True)

# Save all transcriptions to the text file
with open(transcription_output_path, "w", encoding="utf-8") as f:
    for i, transcription in enumerate(all_transcriptions):
        f.write(f"--- Transcription of chunk {i} ---\n")
        f.write(transcription + "\n\n")

print(f"Transcription saved to {transcription_output_path}")
