<a href="https://colab.research.google.com/github/Hozaifa1212/Python/blob/main/TranscriptionGeneratorB_Y.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

In [None]:
import urllib.request

In [None]:
import subprocess

In [None]:
import shutil

In [None]:
pip install pytube pydub SpeechRecognition

In [None]:
from pytube import YouTube
from pydub import AudioSegment

In [None]:
import moviepy.editor as mp

In [None]:
import speech_recognition as sr

In [None]:
def ensure_yt_dlp():

    if not shutil.which("yt-dlp"):
        print("yt-dlp is not installed. Installing...")
        subprocess.run(["pip", "install", "yt-dlp"], check=True)

In [None]:
def download_audio(youtube_url, output_path="audio"):

    try:
        ensure_yt_dlp()

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        print("Downloading audio using yt-dlp...")
        command = [
            "yt-dlp",
            "-f",
            "bestaudio[ext=m4a]",
            "--extract-audio",
            "--audio-format",
            "mp3",
            "-o",
            f"{output_path}/audio.%(ext)s",
            youtube_url
        ]
        subprocess.run(command, check=True)

        audio_file = os.path.join(output_path, "audio.mp3")

        if not os.path.exists(audio_file):
            raise FileNotFoundError("Audio file could not be downloaded.")

        print(f"Downloaded audio to {audio_file}")
        return audio_file
    except subprocess.CalledProcessError as e:
        print(f"yt-dlp command failed: {e}")
        raise
    except Exception as e:
        print(f"Error in audio download: {e}")
        raise


In [None]:
def split_audio(audio_file, chunk_length_ms=60000):

    try:
        audio = AudioSegment.from_file(audio_file)
        chunks = []
        for start in range(0, len(audio), chunk_length_ms):
            chunk = audio[start:start + chunk_length_ms]
            chunk_filename = f"chunk_{start // chunk_length_ms}.wav"
            chunk.export(chunk_filename, format="wav")
            chunks.append(chunk_filename)
        return chunks
    except Exception as e:
        print(f"Error splitting audio: {e}")
        raise


In [None]:
def transcribe_audio(audio_file, language="bn-BD"):
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
        text = recognizer.recognize_google(audio_data, language=language)
        return text
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand the audio."
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        print(f"Error during transcription: {e}")
        raise


In [None]:
def alternative_download_audio(youtube_url, output_path="audio"):
    try:
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        print("Downloading audio using yt-dlp...")
        command = f"yt-dlp -f 'bestaudio[ext=m4a]' --extract-audio --audio-format mp3 -o '{output_path}/audio.%(ext)s' {youtube_url}"
        os.system(command)

        audio_file = os.path.join(output_path, "audio.mp3")

        if not os.path.exists(audio_file):
            raise FileNotFoundError("Audio file could not be downloaded.")

        print(f"Downloaded audio to {audio_file}")
        return audio_file
    except Exception as e:
        print(f"Error in alternative audio download: {e}")
        raise


In [None]:
if __name__ == "__main__":

    # YouTube video URL
    video_url = "https://www.youtube.com/watch?v=gbCjCtqtg_E"

    try:

        # Step 1: Download the audio from the YouTube video
        print("Attempting to download audio...")
        #audio_path = alternative_download_audio(video_url)
        audio_path = download_audio(video_url)

        # Step 2: Extract audio chunks (optional, based on audio length)
        print("Splitting audio into chunks...")
        chunks = split_audio(audio_path)

        # Step 3: Transcribe each audio chunk
        transcribed_text = ""
        for chunk in chunks:
            print(f"Transcribing {chunk}...")
            text = transcribe_audio(chunk, language="bn-BD")
            transcribed_text += text + "\n"

        # Output the transcribed text
        print("\nThe resultant text from the video is (in Bengali):\n")
        print(transcribed_text)

        # Optionally save the transcription to a file
        with open("transcription.txt", "w", encoding="utf-8") as f:
            f.write(transcribed_text)


    except Exception as e:
        print(f"An error occurred: {e}")
