In [1]:
# Install pytube for YouTube video downloading
!pip install pytube
# Install pydub for audio manipulation
!pip install pydub


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [2]:
import os

from pytube import YouTube

#class from the pydub package is imported to manipulate audio files, such as slicing and exporting.
from pydub import AudioSegment

# URL of the video
url = 'https://www.youtube.com/watch?v=_NdA1kNxuJ4&t=1s'

youtube = YouTube(url)

#The audio stream of the YouTube video is filtered and the first available audio stream is selected.
video = youtube.streams.filter(only_audio=True).first()
out_file = video.download(output_path="/content/")

# Load and slice the audio
audio = AudioSegment.from_file(out_file, format="mp4")
sliced_audio = audio[:1*60*1000]  # Slicing first 1 mins, pydub works in milliseconds

# Create the directory if it doesn't exist
os.makedirs('/content/chunk2', exist_ok=True)

# Export the audio to the directory
sliced_audio.export("/content/chunk2/audio.wav", format="wav")


<_io.BufferedRandom name='/content/chunk2/audio.wav'>

In [3]:
import subprocess

input_filename = "/content/chunk2/audio.wav"
output_filename = "/content/chunk2/urdu_10_16k.wav"  # Updated filename

#The command list is created, which represents the command to be executed using FFmpeg. 
# It specifies the input file, the desired audio sample rate of 16000 Hz, and the output file.

command = ['ffmpeg', '-y', '-i', input_filename, '-ar', '16000', output_filename]

#The subprocess.run() function is called to execute the FFmpeg command. 
#This command will resample the input audio file to a sample rate of 16000 Hz and save it as the output file.

subprocess.run(command)


CompletedProcess(args=['ffmpeg', '-y', '-i', '/content/chunk2/audio.wav', '-ar', '16000', '/content/chunk2/urdu_10_16k.wav'], returncode=0)

In [4]:
!mkdir "temp_dir"

#git clone command. 
#It fetches the source code and creates a local copy of the repository on your machine.

!git clone https://github.com/pytorch/fairseq

# Change current working directory
#The !pwd command prints the current working directory, displaying the current path. 
!pwd

#The %cd command changes the current working directory to "/content/fairseq".

%cd "/content/fairseq"

#The --editable flag indicates that the package should be installed in editable mode, 
#allowing modifications to the package source code without needing to reinstall it.
#./ refers to the current directory
!pip install --editable ./ 

#tensorboardX is a library that provides a wrapper around TensorFlow's 
#TensorBoard, enabling visualization and logging of training progress and results.

!pip install tensorboardX


Cloning into 'fairseq'...
remote: Enumerating objects: 34724, done.[K
remote: Counting objects: 100% (181/181), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 34724 (delta 91), reused 142 (delta 68), pack-reused 34543[K
Receiving objects: 100% (34724/34724), 25.01 MiB | 23.15 MiB/s, done.
Resolving deltas: 100% (25183/25183), done.
/content
/content/fairseq
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Obtaining file:///content/fairseq
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting hydra-core<1.1,>=1.0.7 (from fairseq==0.12.2)
  Downloading hydra_core-1.0.7-py3-none-any.whl (123 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.8/123.8 kB[0m [31m9.6 MB/s[0m eta 

In [5]:
# use colab free
# MMS-1B:FL102 model - 102 Languages - FLEURS Dataset
#!wget -P ./models_new 'https://dl.fbaipublicfiles.com/mms/asr/mms1b_fl102.pt'


In [6]:
# use high RAM, such as colab pro
# # MMS-1B-all - 1162 Languages - MMS-lab + FLEURS + CV + VP + MLS
!wget -P ./models_new 'https://dl.fbaipublicfiles.com/mms/asr/mms1b_all.pt'

--2023-06-11 01:49:40--  https://dl.fbaipublicfiles.com/mms/asr/mms1b_all.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 13.227.219.33, 13.227.219.10, 13.227.219.70, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|13.227.219.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14660831159 (14G) [binary/octet-stream]
Saving to: ‘./models_new/mms1b_all.pt’


2023-06-11 01:50:39 (234 MB/s) - ‘./models_new/mms1b_all.pt’ saved [14660831159/14660831159]



In [None]:
import subprocess
import os


# Set the value of the environment variable "TMPDIR"
os.environ["TMPDIR"] = '/content/temp_dir'

# Set the value of the environment variable "PYTHONPATH"
os.environ["PYTHONPATH"] = "."

# Set the value of the environment variable "PREFIX"
os.environ["PREFIX"] = "INFER"

# Set the value of the environment variable "HYDRA_FULL_ERROR"
os.environ["HYDRA_FULL_ERROR"] = "1"

# Set the value of the environment variable "USER"
os.environ["USER"] = "micro"

filename = "/content/chunk2/urdu_10_16k.wav"

# Open a file named "output.txt" in write mode
with open('/content/chunk2/output.txt', 'w') as f:

    # Command to run MMS inference
    command = ['python', 'examples/mms/asr/infer/mms_infer.py', '--model', '/content/fairseq/models_new/mms1b_all.pt', '--lang', 'urd-script_arabic', '--audio', filename]

    # Run the command and redirect the standard output to the file
    subprocess.run(command, stdout=f)




In [None]:
import os
from pytube import YouTube
from pydub import AudioSegment
import subprocess

# URL of the video
url = 'https://www.youtube.com/watch?v=_NdA1kNxuJ4&t=1s'

youtube = YouTube(url)
video = youtube.streams.filter(only_audio=True).first()
out_file = video.download(output_path="/content/")

# Load the audio
audio = AudioSegment.from_file(out_file, format="mp4")

# Create the directory if it doesn't exist
os.makedirs('/content/chunk4', exist_ok=True)

# Determine the end time for slicing in milliseconds
end_time = 10*60*1000  # 10 minutes

# Initialize start time and chunk size in milliseconds
start_time = 0
chunk_size = 60*1000  # 60 seconds

# Slice and export each 60-second chunk of audio
i = 0
while start_time < end_time:
    # Calculate the end time for the current slice
    slice_end_time = min(start_time + chunk_size, end_time)
    
    # Slice the audio
    sliced_audio = audio[start_time:slice_end_time]
    
    # Export the sliced audio
    original_filename = f"/content/chunk4/audio_{i}.wav"
    sliced_audio.export(original_filename, format="wav")
    
    # Apply the ffmpeg process
    output_filename = f"/content/chunk4/urdu_{i}_16k.wav"  # Updated filename
    command = ['ffmpeg', '-y', '-i', original_filename, '-ar', '16000', output_filename]
    subprocess.run(command)
    
    # Move to the next slice
    start_time += chunk_size
    i += 1


In [None]:
import os
import subprocess

# Set environment variables
os.environ["TMPDIR"] = '/content/temp_dir'
os.environ["PYTHONPATH"] = "."
os.environ["PREFIX"] = "INFER"
os.environ["HYDRA_FULL_ERROR"] = "1"
os.environ["USER"] = "micro"

# Define your directory
directory = '/content/chunk4/'

# Define the range of file indices
file_indices = range(10)  # This will give you the numbers 0 to 9

# Define a single output file to store all transcriptions
combined_output_filename = os.path.join(directory, 'combined_transcription.txt')

# Check if combined transcription file already exists and if so, remove it
if os.path.exists(combined_output_filename):
    os.remove(combined_output_filename)

# Loop through the file indices
for i in file_indices:
    # Construct the input file name
    input_filename = os.path.join(directory, f'urdu_{i}_16k.wav')

    # Check if file exists
    if not os.path.exists(input_filename):
        print(f"File {input_filename} does not exist, skipping.")
        continue

    # Define the output filename for the transcription
    output_filename = os.path.join(directory, f'transcription_{i}.txt')

    # Open the transcription file in write mode
    with open(output_filename, 'w') as f:
        # Run the model inference on the audio file and save the output to the file
        subprocess.run(['python', 'examples/mms/asr/infer/mms_infer.py', '--model', '/content/fairseq/models_new/mms1b_all.pt', '--lang', 'urd-script_arabic', '--audio', input_filename], stdout=f)

    # Now, append the individual transcription to the combined file
    with open(output_filename, 'r') as f_in, open(combined_output_filename, 'a') as f_out:
        # Read the individual transcription and write it to the combined file
        f_out.write(f_in.read())
        # Optionally, write a newline character to separate the transcriptions
        f_out.write('\n')
