<a href="https://colab.research.google.com/github/ShresthaYaju/Video-Transcriber/blob/main/VideoTranscriber.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install SpeechRecognition moviepy pydub


In [None]:
import speech_recognition as sr
from moviepy.editor import VideoFileClip
from pydub import AudioSegment

# Function to extract audio from video
def extract_audio_from_video(video_path, audio_output_path):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_output_path)

# Function to transcribe audio to text using SpeechRecognition
def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_wav(audio_path)

    # Split long audio into chunks to avoid limits on processing large files
    chunk_size_ms = 60000  # 1-minute chunks
    chunks = [audio[i:i + chunk_size_ms] for i in range(0, len(audio), chunk_size_ms)]

    transcript = []

    for i, chunk in enumerate(chunks):
        chunk_path = f"chunk_{i}.wav"
        chunk.export(chunk_path, format="wav")

        with sr.AudioFile(chunk_path) as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data)
                transcript.append(text)
            except sr.UnknownValueError:
                print("Speech Recognition could not understand audio")
            except sr.RequestError as e:
                print(f"Could not request results; {e}")

    return " ".join(transcript)

# Main function to handle video transcription
def transcribe_video(video_path, audio_output_path="audio.wav"):
    # Step 1: Extract audio from the video
    extract_audio_from_video(video_path, audio_output_path)

    # Step 2: Transcribe the audio to text
    transcript = transcribe_audio(audio_output_path)

    return transcript

# Example usage
video_path = "/content/t-2XuJdW4AnpaEWgeo1DS1Wa3NipRwETug.mp4"
transcript = transcribe_video(video_path)
print(transcript)
