In [None]:
import assemblyai as aai
import os
import tempfile
from moviepy import VideoFileClip

In [None]:
aai.settings.api_key = "7d2ad05e20724036aa58d1ecf8d0dd26"
AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
VIDEO_EXTS = {".mp4", ".mov", ".mkv", ".avi", ".webm"}
OUTPUT_DIR = "."   



In [None]:
def get_audio_path_from_media(media_path: str) -> str:
    """
    If media_path is an audio file -> return the same path.
    If it's a video file -> extract audio to a temp .wav and return that path.
    """
    ext = os.path.splitext(media_path)[1].lower()

    if ext in AUDIO_EXTS:
        # Already audio
        return media_path

    if ext in VIDEO_EXTS:
        # Extract audio
        return extract_audio_from_video(media_path)

    raise ValueError(f"Unsupported media type: {ext}")

In [None]:
def extract_audio_from_video(video_path: str) -> str:
    """
    Take a video file, extract the audio track to a temporary .wav file,
    and return the new audio file path.
    """
    clip = VideoFileClip(video_path)
    audio = clip.audio

    # temp file for extracted audio
    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    tmp_path = tmp.name
    tmp.close()

    # moviepy writes the audio track to that file
    audio.write_audiofile(tmp_path)
    clip.close()

    return tmp_path


In [None]:
def transcribe_media(media_path: str):
    """
    - Accepts audio OR video file
    - Extracts audio if needed
    - Sends to AssemblyAI (diarization + summarization)
    - Saves SRT + summary into OUTPUT_DIR
    - Returns (transcript, srt_path, summary_path)
    """
    audio_file = get_audio_path_from_media(media_path)

    config = aai.TranscriptionConfig(
        speaker_labels=True,
        speaker_options=aai.SpeakerOptions(
            min_speakers_expected=2,
            max_speakers_expected=5,
        ),
        language_detection=True,
        language_detection_options=aai.LanguageDetectionOptions(
            expected_languages=["en", "hi", "pa", "es", "fr", "de"],
            fallback_language="auto",
            code_switching=True,
            code_switching_confidence_threshold=0.5,
        ),
        summarization=True,
        summary_model=aai.SummarizationModel.informative,
        summary_type=aai.SummarizationType.bullets,
    )

    transcriber = aai.Transcriber()
    transcript = transcriber.transcribe(audio_file, config=config)

    if transcript.status == aai.TranscriptStatus.error:
        raise RuntimeError(f"Transcription failed: {transcript.error}")

    # make sure folder exists
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # ---------- SRT ----------
    srt = transcript.export_subtitles_srt(chars_per_caption=32)
    srt_path = os.path.join(OUTPUT_DIR, f"transcript_{transcript.id}.srt")
    with open(srt_path, "w", encoding="utf-8") as srt_file:
        srt_file.write(srt)

    # ---------- SUMMARY ----------
    summary_text = transcript.summary

    if summary_text is None:
        print("No summary from API, falling back to transcript.text[:2000].")
        summary_text = (transcript.text or "")[:2000]

    summary_path = os.path.join(
        OUTPUT_DIR, f"transcript_{transcript.id}_summary.txt"
    )
    with open(summary_path, "w", encoding="utf-8") as summary_file:
        summary_file.write(summary_text)

    print("SRT saved at:", srt_path)
    print("Summary saved at:", summary_path)

    return transcript, srt_path, summary_path


In [None]:
# Put this in main
'''
if __name__ == "__main__":
    media_path = input("Enter path to your audio/video file: ").strip()

    if not os.path.isfile(media_path):
        print("File not found. Check the path.")
        raise SystemExit

    print(f"[*] Selected: {media_path}")
    transcript = transcribe_media(media_path)
'''