In [None]:
# Install the transformers library to access pre-trained models
!pip install transformers

# Import the necessary libraries and set up the automatic speech recognition (ASR) pipeline
from transformers import pipeline
from google.colab import drive
from glob import glob
import numpy as np  # Manages your Array operations
import pandas as pd
from tqdm import tqdm
from IPython.display import Audio

# Set up the ASR pipeline using a Swahili model
pipe = pipeline("automatic-speech-recognition", model="Akashpb13/Swahili_xlsr", device=0)

# Mount Google Drive to access your files
drive.mount('/content/drive')

# Read the CSV file containing information about the audio files
test = pd.read_csv('/content/drive/MyDrive/Models/SampleSubmission.csv')

# Extract information about the first few rows of the DataFrame
test.head()


In [None]:
# Extract the audio files from the compressed archive
!tar xf "/content/drive/MyDrive/ASR/test0.tar.gz"

# Display an audio file using IPython's Audio widget
Audio("/content/test/common_voice_sw_27729935.mp3")


In [None]:
# Ignore warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

# Use the ASR pipeline to transcribe a single audio file
pipe("/content/test/common_voice_sw_27729935.mp3")


In [None]:
# Create a new column in the DataFrame to store the modified file paths
test["my_path"] = ["/content/test/" + i for i in test.path]

# Display the updated DataFrame
test


In [None]:
# Use the ASR pipeline to transcribe multiple audio files
pipe(["/content/test/common_voice_sw_27729935.mp3", "/content/test/common_voice_sw_35780884.mp3", "/content/test/common_voice_sw_36450168.mp3"])


In [None]:
# Store the results in a DataFrame
results = pipe(test.my_path.to_list())
result_list = [i["text"] for i in results]  # Extract the transcribed text

# Display the first 3 transcribed samples
result_list[:3]


In [None]:
# Create a submission DataFrame
sub = pd.DataFrame()
sub["path"] = test.path.to_list()
sub["sentence"] = result_list

# Save the submission to a CSV file
sub.to_csv("Bill's Submission no 3.csv", index=False)

# Display the first few rows of the submission DataFrame
sub.head()


In [None]:
# Perform ASR predictions on the entire dataset
res = []
for path in tqdm(test.path):
    res.append(pipe(f'/content/drive/MyDrive/asr/test_audios/{path}')['text'])

# Update the test DataFrame with the transcribed sentences
test['sentence'] = res

# Save the DataFrame to a CSV file for further analysis
test[['audio_ID', 'sentence']].to_csv('/content/drive/MyDrive/asr/res.csv', index=False)

# Display the updated test DataFrame
test
