# Arabic Speech to Text Converter

### Install Necessary Libraries


In [None]:
!pip install pydub pytube speechrecognition google-cloud-storage ffmpeg

### Import Required Libraries

In [None]:
import os
import requests
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence
from pytube import YouTube
import pandas as pd

# Initialize the recognizer
r = sr.Recognizer()

### Function to Transcribe Audio from File

In [None]:
def transcribe_audio(path, language='ar'):
    """
    Transcribe audio from a file and returns the transcribed text.
    
    Parameters:
    - path (str): Path to the audio file
    - language (str): Language of the audio for transcription (default is 'ar' for Arabic)
    
    Returns:
    - text (str): Transcribed text
    """
    with sr.AudioFile(path) as source:  # Open the audio file
        audio_listened = r.record(source)  # Record the audio
        text = r.recognize_google(audio_listened, language=language)  # Transcribe the audio using Google's API
    return text

### Function to Split Audio into Chunks

In [None]:
def split_audio_chunks(sound, silence_len=500, silence_thresh=-14, keep_silence=500):
    """
    Split audio into chunks based on silence and returns list of audio chunks.
    
    Parameters:
    - sound (AudioSegment): The audio segment to be split
    - silence_len (int): Minimum length of silence to consider for splitting (in ms)
    - silence_thresh (int): Silence threshold in dBFS
    - keep_silence (int): Amount of silence to leave at the beginning and end of each chunk (in ms)
    
    Returns:
    - List of audio chunks
    """
    return split_on_silence(sound,
                            min_silence_len=silence_len,
                            silence_thresh=sound.dBFS + silence_thresh,
                            keep_silence=keep_silence)

### Function to Process Audio Chunks and Concatenate Transcriptions

In [None]:
def process_chunks(chunks, folder_name, language='ar'):
    """
    Process audio chunks, transcribe them, and concatenate the transcribed text as final output.
    
    Parameters:
    - chunks (list): List of audio chunks
    - folder_name (str): Folder where chunks will be saved
    - language (str): Language for transcription
    
    Returns:
    - whole_text (str): Full transcription of all audio chunks
    """
    if not os.path.isdir(folder_name):  # Create the folder if it doesn't exist
        os.mkdir(folder_name)

    whole_text = "Complete Transcription: "
    for i, audio_chunk in enumerate(chunks, start=1):  # Iterate through each audio chunk
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")  # Define the chunk file path
        audio_chunk.export(chunk_filename, format="wav")  # Export the chunk as a wav file

        try:
            text = transcribe_audio(chunk_filename, language)  # Transcribe the audio chunk
        except sr.UnknownValueError:
            print("")  # Handle transcription errors gracefully
        else:
            text = f"{text.capitalize()}. "  # Capitalize and format the transcribed text
            whole_text += text  # Append the transcribed text to the whole text

    return whole_text

### Function to Transcribe Large Audio Files

In [None]:
def get_large_audio_transcription(path, method='silence', interval=5, language='ar'):
    """
    Transcribe a large audio file by splitting it into chunks.
    
    Parameters:
    - path (str): Path to the audio file
    - method (str): Method for splitting audio ('silence' or 'interval')
    - interval (int): Interval in minutes for splitting the audio (used if method is 'interval')
    - language (str): Language for transcription
    
    Returns:
    - Final transcribed text
    """
    sound = AudioSegment.from_file(path)  # Load the audio file

    # Split the audio based on the selected method
    if method == 'silence':
        chunks = split_audio_chunks(sound)  # Split by silence
    else:
        chunk_length_ms = interval * 60 * 1000  # Convert interval to milliseconds
        chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]  # Split by fixed intervals

    folder_name = "audio-chunks" if method == 'silence' else "audio-fixed-chunks"  # Define folder name based on method
    return process_chunks(chunks, folder_name, language)  # Process and transcribe the chunks

### Transcribe the Audio File and Save Results

In [None]:
if __name__ == "__main__":
    downloaded_file = "/path/to/audio.wav"  # Path to the audio file

    # Transcribe the audio file using silence-based chunking
    transcription = get_large_audio_transcription(downloaded_file, method='silence')
    print(transcription)  # Print the full transcription

    # Convert the transcription to a DataFrame with a single column
    transcription_df = pd.DataFrame([transcription], columns=['Transcription'])
    transcription_df  # Display the DataFrame

    # Count words in the transcription
    def count_words(text):
        words = text.split()
        return len(words)

    word_count = count_words(transcription)
    print("Number of Words:", word_count)

    # Save the transcription to a CSV file
    transcription_df.to_csv('text.txt')