In [None]:
import os
import json
from moviepy.video.io.VideoFileClip import VideoFileClip
import speech_recognition as sr

def split_and_save_video_chunks(video_path, chunk_length, output_dir):
    with VideoFileClip(video_path) as video:
        duration = int(video.duration)
        for i in range(0, duration, chunk_length):
            start = i
            end = min(i + chunk_length, duration)
            chunk = video.subclip(start, end)
            chunk_path = os.path.join(output_dir, f"chunk_{start:04d}_{end:04d}.mp4")
            chunk.write_videofile(chunk_path, codec="libx264")
            yield chunk, start, end

def extract_and_transcribe(video_clip, start, end, recognizer, output_dir):
    audio_path = os.path.join(output_dir, f"chunk_{start:04d}_{end:04d}.wav")
    video_clip.audio.write_audiofile(audio_path)

    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Audio was not understood"
        except sr.RequestError:
            return "Request failed"

def process_videos_in_directory(directory):
    recognizer = sr.Recognizer()
    for filename in os.listdir(directory):
        if filename.endswith(".mp4"):
            video_path = os.path.join(directory, filename)
            video_name = os.path.splitext(filename)[0]
            # Creating the output directory outside the input directory
            output_dir = os.path.join(directory, '..', video_name + '_chunks')
            os.makedirs(output_dir, exist_ok=True)

            all_transcripts = {}
            for video_clip, start, end in split_and_save_video_chunks(video_path, 10, output_dir):
                transcript = extract_and_transcribe(video_clip, start, end, recognizer, output_dir)
                all_transcripts[f"chunk_{start:04d}_{end:04d}"] = transcript

            json_path = os.path.join(output_dir, 'full_transcript.json')
            with open(json_path, 'w') as json_file:
                json.dump(all_transcripts, json_file, indent=4)

            print(f"Transcription for {video_name} completed and saved in {output_dir}")

# Usage
directory_path = "testingg"
process_videos_in_directory(directory_path)
