<a href="https://colab.research.google.com/github/Sourasky-DHLAB/Whisper/blob/main/fairseq_meta_mms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# clone fairseq
import os

!git clone https://github.com/pytorch/fairseq

# Get the current working directory
current_dir = os.getcwd()

# Create the directory paths
audio_samples_dir = os.path.join(current_dir, "audio_samples")
temp_dir = os.path.join(current_dir, "temp_dir")

# Create the directories if they don't exist
os.makedirs(audio_samples_dir, exist_ok=True)
os.makedirs(temp_dir, exist_ok=True)


# Change current working directory
os.chdir('fairseq')

!pwd

# to install the latest stable release (0.10.x)
# pip install fairseq

In [None]:
# Install requirements and build

!pip install --editable ./

In [None]:
# Install tensorboardX
!pip install tensorboardX

In [None]:
!pwd

In [None]:
# Download Model (uncomment)

# # MMS-1B:FL102 model - 102 Languages - FLEURS Dataset
#!wget -P ./models_new 'https://dl.fbaipublicfiles.com/mms/asr/mms1b_fl102.pt'

# # MMS-1B:L1107 - 1107 Languages - MMS-lab Dataset
#!wget -P ./models_new 'https://dl.fbaipublicfiles.com/mms/asr/mms1b_l1107.pt'

# MMS-1B-all - 1162 Languages - MMS-lab + FLEURS + CV + VP + MLS
!wget -P ./models_new 'https://dl.fbaipublicfiles.com/mms/asr/mms1b_all.pt'


In [None]:
# Run Inference for a short audio file only

import os
import time

os.environ["TMPDIR"] = '/content/temp_dir'
os.environ["PYTHONPATH"] = "."
os.environ["PREFIX"] = "INFER"
os.environ["HYDRA_FULL_ERROR"] = "1"
os.environ["USER"] = "micro"
start_time = time.time()  # Start the timer

!python examples/mms/asr/infer/mms_infer.py --model "/content/fairseq/models_new/mms1b_fl102.pt" --lang "heb" --audio "/content/audio_samples/t0102.wav"

end_time = time.time()  # Stop the timer
elapsed_time = end_time - start_time

print("Elapsed Time:", elapsed_time, "seconds")

In [36]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
# split audio to chunks based on silence
# run inference on each part
# use for large audio files
import os
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence
import subprocess
from tqdm import tqdm

os.environ["TMPDIR"] = '/content/temp_dir'
os.environ["PYTHONPATH"] = "."
os.environ["PREFIX"] = "INFER"
os.environ["HYDRA_FULL_ERROR"] = "1"
os.environ["USER"] = "micro"

start_time = time.time()  # Start the timer

audio_path = "/content/audio_samples/t0102.wav"
audio = AudioSegment.from_wav(audio_path)

chunk_length_ms = 30 * 1000  # 30 seconds in milliseconds

model_path = "/content/fairseq/models_new/mms1b_fl102.pt"
language = "heb"

# Split audio based on silence
audio_chunks = split_on_silence(audio, min_silence_len=1000, silence_thresh=-40)

transcripts = []  # Store the transcripts of each chunk

progress_bar = tqdm(total=len(audio_chunks), desc="Processing Chunks")

for i, chunk in enumerate(audio_chunks):
    # Export the current chunk to a file
    chunk_path = f"/content/temp_dir/chunk_{i}.wav"
    chunk.export(chunk_path, format="wav")

    # Perform inference on the current chunk
    inference_command = f"python examples/mms/asr/infer/mms_infer.py --model {model_path} --lang {language} --audio {chunk_path}"
    output = subprocess.check_output(inference_command, shell=True, text=True)
    transcripts.append(output)

    progress_bar.update(1)

progress_bar.close()

full_transcript = " ".join(transcripts)  # Concatenate the transcripts

end_time = time.time()  # Stop the timer
elapsed_time = end_time - start_time

print("Full Transcript:")
print(full_transcript)
print("Elapsed Time:", elapsed_time, "seconds")
