In [2]:
import os
from openai import OpenAI
from pathlib import Path
from dotenv import load_dotenv
import moviepy.editor as mp
from pydub import AudioSegment
import tempfile

# Load environment variables
load_dotenv()

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


def chunk_audio(audio_path, chunk_duration_ms=25000):
    """Split audio into chunks"""
    print("Loading audio file...")
    audio = AudioSegment.from_file(audio_path)
    chunks = []

    total_duration_ms = len(audio)
    num_chunks = (total_duration_ms // chunk_duration_ms) + 1

    print(f"Splitting audio into {num_chunks} chunks...")
    for i in range(num_chunks):
        start_ms = i * chunk_duration_ms
        end_ms = min((i + 1) * chunk_duration_ms, total_duration_ms)
        chunk = audio[start_ms:end_ms]

        # Save chunk to temporary file
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_chunk:
            chunk.export(temp_chunk.name, format="mp3")
            chunks.append(temp_chunk.name)

    return chunks


def transcribe_video(video_path):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")

    # Create temp directory
    temp_dir = Path("temp")
    temp_dir.mkdir(exist_ok=True)

    # Convert video to audio
    print("Converting video to audio...")
    audio_path = temp_dir / f"{Path(video_path).stem}.mp3"
    video = mp.VideoFileClip(video_path)
    video.audio.write_audiofile(str(audio_path), verbose=False)
    video.close()

    # Split audio into chunks
    chunks = chunk_audio(audio_path)

    # Transcribe chunks
    print("Transcribing chunks...")
    full_transcription = []
    for i, chunk_path in enumerate(chunks, 1):
        print(f"Transcribing chunk {i}/{len(chunks)}...")
        with open(chunk_path, "rb") as audio_file:
            transcription = client.audio.transcriptions.create(
                model="whisper-1", file=audio_file, response_format="text"
            )
            full_transcription.append(transcription)

        # Clean up chunk file
        os.unlink(chunk_path)

    # Clean up audio file
    audio_path.unlink()

    return " ".join(full_transcription)


# Example usage:
video_path = "/Users/cvk/Downloads/AH110 Session 17 - The Global History of Cities.mp4"
transcription = transcribe_video(video_path)
print("\nTranscription:")
print(transcription)

# Save transcription to a text file
output_file = f"{Path(video_path).stem}_transcription.txt"
with open(output_file, "w") as f:
    f.write(transcription)
print(f"\nTranscription saved to: {output_file}")

Converting video to audio...
MoviePy - Writing audio in temp/AH110 Session 17 - The Global History of Cities.mp3


                                                                          

MoviePy - Done.
Loading audio file...
Splitting audio into 243 chunks...
Transcribing chunks...
Transcribing chunk 1/243...
Transcribing chunk 2/243...
Transcribing chunk 3/243...
Transcribing chunk 4/243...
Transcribing chunk 5/243...
Transcribing chunk 6/243...
Transcribing chunk 7/243...
Transcribing chunk 8/243...
Transcribing chunk 9/243...
Transcribing chunk 10/243...
Transcribing chunk 11/243...
Transcribing chunk 12/243...
Transcribing chunk 13/243...
Transcribing chunk 14/243...
Transcribing chunk 15/243...
Transcribing chunk 16/243...
Transcribing chunk 17/243...
Transcribing chunk 18/243...
Transcribing chunk 19/243...
Transcribing chunk 20/243...
Transcribing chunk 21/243...
Transcribing chunk 22/243...
Transcribing chunk 23/243...
Transcribing chunk 24/243...
Transcribing chunk 25/243...
Transcribing chunk 26/243...
Transcribing chunk 27/243...
Transcribing chunk 28/243...
Transcribing chunk 29/243...
Transcribing chunk 30/243...
Transcribing chunk 31/243...
Transcribing c