In [None]:
import os
import json
from pytubefix import YouTube # Using pytubefix
# from pytubefix.exceptions import PytubeFixError # Check actual exception name if needed for specific pytubefix errors
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from pydantic import BaseModel, Field, RootModel # For Pydantic V2
from langchain_openai.chat_models import ChatOpenAI
import subprocess
from typing import List, Optional
from dotenv import load_dotenv
import logging # For more structured logging

# Configure basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    logging.error("OpenAI API key not found. Please set it in your .env file.")
    # You might want to raise an exception here or handle it more gracefully
    # raise ValueError("OpenAI API key not found.")
else:
    logging.info("OpenAI API key loaded successfully.")

In [None]:
# --- User-Defined Configuration ---
youtube_url = "https://youtu.be/nvuAt8sl7Ag?si=6x3KLf63BttTX_qx&utm_source=ZTQxO"
video_download_directory = "downloaded_video"
clips_output_directory = "generated_clips"
desired_video_resolution = "720p" # e.g., "720p", "360p", or None for best progressive
segment_duration_min_seconds = 50
segment_duration_max_seconds = 59
max_topics_to_identify = 5 # How many topics to ask the first LLM for

# --- Global State Variables (initialized) ---
downloaded_video_path = None
video_base_title = None
raw_transcript_data = None      
transcript_file_path = None
identified_topics: List[str] = []
all_extracted_segments: List[dict] = [] # Will store Segment-like dictionaries

logging.info(f"Configuration loaded. Target YouTube URL: {youtube_url}")

In [None]:
# --- Ensure Directories Exist ---
if not os.path.exists(video_download_directory):
    os.makedirs(video_download_directory)
    logging.info(f"Created directory: {video_download_directory}")

if not os.path.exists(clips_output_directory):
    os.makedirs(clips_output_directory)
    logging.info(f"Created directory: {clips_output_directory}")

In [None]:
# --- Download Video ---
try:
    logging.info(f"Fetching video info for: {youtube_url}")
    yt = YouTube(youtube_url)

    video_title_raw = yt.title
    # Sanitize title for filename: Keep alphanumeric, spaces, hyphens, underscores. Replace others with underscore.
    video_base_title_temp = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in video_title_raw).rstrip()
    video_base_title_temp = video_base_title_temp.replace(" ", "_")
    if not video_base_title_temp: # Handle cases where title becomes empty
        video_base_title_temp = f"youtube_video_{yt.video_id}"
    video_base_title = video_base_title_temp # Assign to global variable

    video_filename_mp4 = f"{video_base_title}.mp4"
    downloaded_video_path_temp = os.path.join(video_download_directory, video_filename_mp4)

    logging.info(f"Video Title: {video_title_raw}")
    logging.info(f"Sanitized base title: {video_base_title}")
    logging.info(f"Attempting to download video to: {downloaded_video_path_temp}")

    # Check if video already exists to avoid re-downloading
    if os.path.exists(downloaded_video_path_temp):
        logging.info(f"Video already exists at {downloaded_video_path_temp}. Skipping download.")
        downloaded_video_path = downloaded_video_path_temp
    else:
        streams_query = yt.streams.filter(progressive=True, file_extension='mp4')
        stream_to_download = None
        if desired_video_resolution:
            stream_to_download = streams_query.filter(res=desired_video_resolution).first()
            if not stream_to_download:
                logging.warning(f"Resolution {desired_video_resolution} not found as progressive mp4. Trying best available.")
                stream_to_download = streams_query.order_by('resolution').desc().first()
        else:
            stream_to_download = streams_query.order_by('resolution').desc().first()

        if stream_to_download:
            logging.info(f"Found video stream: Resolution {stream_to_download.resolution}, MIME Type {stream_to_download.mime_type}")
            stream_to_download.download(output_path=video_download_directory, filename=video_filename_mp4)
            logging.info(f"Successfully downloaded video: {downloaded_video_path_temp}")
            downloaded_video_path = downloaded_video_path_temp # Assign to global
        else:
            logging.error("No suitable progressive MP4 stream found for this video.")
            # Consider raising an error to stop execution if video is essential

except Exception as e: # Catching a broader exception for pytubefix initially
    logging.error(f"An error occurred during video download: {e}", exc_info=True)
    # Consider how to handle this - maybe exit or skip subsequent steps

In [None]:
# --- Fetch and Save Transcript ---
if downloaded_video_path and video_base_title: # Proceed only if video download was successful
    try:
        video_id = yt.video_id # yt object should still be in scope from Cell 5
        logging.info(f"Fetching transcript for video ID: {video_id}...")

        transcript_filename_json = f"{video_base_title}_transcript.json"
        transcript_file_path_temp = os.path.join(video_download_directory, transcript_filename_json)

        # Check if transcript file already exists
        if os.path.exists(transcript_file_path_temp):
            logging.info(f"Transcript file already exists: {transcript_file_path_temp}. Loading from file.")
            with open(transcript_file_path_temp, "r", encoding="utf-8") as f:
                raw_transcript_data_temp = json.load(f)
        else:
            raw_transcript_data_temp = YouTubeTranscriptApi.get_transcript(video_id)
            with open(transcript_file_path_temp, "w", encoding="utf-8") as f:
                json.dump(raw_transcript_data_temp, f, ensure_ascii=False, indent=4)
            logging.info(f"Successfully fetched and saved transcript to: {transcript_file_path_temp}")

        raw_transcript_data = raw_transcript_data_temp # Assign to global
        transcript_file_path = transcript_file_path_temp # Assign to global

        # Optional: Basic validation of transcript data
        if isinstance(raw_transcript_data, list) and len(raw_transcript_data) > 0 and isinstance(raw_transcript_data[0], dict):
            logging.info(f"Transcript loaded/fetched successfully. Number of segments: {len(raw_transcript_data)}")
        else:
            logging.warning("Transcript data seems to be empty or not in the expected format.")
            raw_transcript_data = None # Reset if invalid

    except TranscriptsDisabled:
        logging.error(f"Transcripts are disabled for video: {youtube_url}")
        raw_transcript_data = None
    except NoTranscriptFound:
        logging.error(f"No transcript found for video: {youtube_url}. It might be missing or not in a supported language.")
        raw_transcript_data = None
    except Exception as e:
        logging.error(f"An error occurred while fetching or saving the transcript: {e}", exc_info=True)
        raw_transcript_data = None
else:
    logging.warning("Skipping transcript fetching because video download failed or video_base_title was not set.")

# For the LLM, we might need a single string of the transcript text for some prompts
# For topic identification, sending the whole structured transcript might be too much for cheaper models.
# Let's create a concatenated text version.
full_transcript_text = ""
if raw_transcript_data:
    full_transcript_text = " ".join([segment['text'] for segment in raw_transcript_data])
    logging.info(f"Full transcript text concatenated. Length: {len(full_transcript_text)} characters.")

In [None]:
# --- Pydantic Model for Topic Analysis ---
class VideoAnalysis(BaseModel):
    summary: str = Field(..., description="A brief summary of the video content, in 2-3 sentences.")
    main_topics: List[str] = Field(
        ...,
        description=f"A list of {max_topics_to_identify} distinct main topics or themes discussed in the video. Each topic should be a short phrase (2-5 words) suitable for guiding clip extraction.",
        min_length=1, # Pydantic v2: min_items, for v1 it was min_items in Field. For Pydantic v2 List, use model_validator or check during Field
        max_length=max_topics_to_identify # Pydantic v2: max_items
    )
    target_audience: Optional[str] = Field(None, description="The likely target audience for this video (e.g., 'Tech Enthusiasts', 'Business Leaders', 'Students').")

logging.info("Pydantic model 'VideoAnalysis' defined for topic identification.")

In [None]:
# --- LLM for Topic Identification ---
identified_topics = [] # Reset global variable

if OPENAI_API_KEY and full_transcript_text: # Proceed only if API key and transcript text exist
    try:
        logging.info("Initializing LLM for topic identification...")
        # Using a potentially faster/cheaper model for this broader task
        llm_topic_identifier = ChatOpenAI(
            api_key=OPENAI_API_KEY,
            model="gpt-4o-mini", # or "gpt-3.5-turbo"
            temperature=0.3
        )

        topic_prompt_text = (
            f"Please analyze the following video transcript. Provide a brief summary of the video's content, "
            f"identify exactly {max_topics_to_identify} distinct main topics or themes that would be suitable for creating engaging short video clips, "
            f"and suggest a likely target audience for this video.\n\n"
            f"Each topic should be a concise phrase (2-5 words).\n\n"
            f"Respond ONLY in the requested JSON structure.\n\n"
            f"Transcript Text:\n\"\"\"\n{full_transcript_text[:15000]}\n\"\"\"" # Send a truncated version if too long for this model
        ) # Limiting to first 15k chars for topic ID to manage token limits & cost

        topic_messages = [
            {"role": "system", "content": "You are an expert video content analyst. Your task is to analyze a video transcript and identify its core themes. Respond ONLY with the requested JSON structure."},
            {"role": "user", "content": topic_prompt_text}
        ]

        logging.info("Invoking LLM for topic identification...")
        structured_llm_topic = llm_topic_identifier.with_structured_output(VideoAnalysis)
        analysis_result = structured_llm_topic.invoke(topic_messages)

        if analysis_result:
            logging.info(f"Video Summary: {analysis_result.summary}")
            logging.info(f"Identified Topics: {analysis_result.main_topics}")
            if analysis_result.target_audience:
                logging.info(f"Target Audience: {analysis_result.target_audience}")
            identified_topics = analysis_result.main_topics # Assign to global
        else:
            logging.warning("LLM for topic identification did not return a valid result.")

    except Exception as e:
        logging.error(f"An error occurred during topic identification with LLM: {e}", exc_info=True)
else:
    if not OPENAI_API_KEY:
        logging.warning("OpenAI API key not available. Skipping topic identification.")
    if not full_transcript_text:
        logging.warning("Transcript text not available. Skipping topic identification.")

In [None]:
# --- Pydantic Models for Segment Extraction ---
class Segment(BaseModel):
    start_time: int = Field(..., description="The start time of the segment in SECONDS from the beginning of the video.")
    end_time: int = Field(..., description="The end time of the segment in SECONDS from the beginning of the video.")
    title: str = Field(..., description="A concise, engaging title for this video segment (max 10 words).")
    description: str = Field(..., description="A brief 1-2 sentence description of what this segment is about.")
    # Duration can be calculated: end_time - start_time.
    # LLM should focus on getting start_time and end_time to match the duration constraints.

class SegmentResponse(BaseModel): # Renamed for clarity
    segments: List[Segment] = Field(..., description="A list of identified video segments matching the criteria.")

logging.info("Pydantic models 'Segment' and 'SegmentResponse' defined for segment extraction.")

In [None]:
# --- LLM for Segment Extraction (Looping Per Topic) ---
all_extracted_segments = [] # Reset global variable

if OPENAI_API_KEY and raw_transcript_data and identified_topics: # Proceed if all prereqs are met
    try:
        logging.info("Initializing LLM for segment extraction...")
        # Using a potentially more powerful model for precision in segmenting
        llm_segment_extractor = ChatOpenAI(
            api_key=OPENAI_API_KEY,
            model="gpt-4o-mini", # or "gpt-4o-mini" if cost/speed is a concern and quality is acceptable
            temperature=0.5 # Slightly lower temp for more factual extraction
        )
        structured_llm_segment = llm_segment_extractor.with_structured_output(SegmentResponse)

        for topic_index, current_topic in enumerate(identified_topics):
            logging.info(f"Processing topic {topic_index + 1}/{len(identified_topics)}: '{current_topic}'")

            segment_prompt_text = (
                f"From the provided full video transcript (which is a list of dictionaries, each with 'text', 'start' in seconds, and 'duration' in seconds), "
                f"identify all distinct segments that are specifically and clearly about the topic: '{current_topic}'.\n\n"
                f"For each segment you identify:\n"
                f"1. It MUST be between {segment_duration_min_seconds} and {segment_duration_max_seconds} seconds in duration.\n"
                f"2. Provide extremely accurate 'start_time' and 'end_time' in whole SECONDS from the beginning of the video.\n"
                f"3. Create a concise, engaging 'title' (max 10 words, relevant to the topic and segment content).\n"
                f"4. Write a brief 'description' (1-2 sentences) of what this specific segment covers.\n\n"
                f"Carefully review the timestamps in the transcript to ensure accuracy. Prioritize segments that are coherent and self-contained.\n\n"
                f"Respond ONLY with a JSON object containing a single key 'segments', where 'segments' is a list of these segment objects. "
                f"If no segments match the criteria for this topic, return an empty list for 'segments'.\n\n"
                f"Topic to focus on: {current_topic}\n\n"
                f"Full Video Transcript (list of dictionaries):\n\"\"\"\n{raw_transcript_data}\n\"\"\"" # Send the structured transcript
            )

            segment_messages = [
                {"role": "system", "content": "You are a YouTube shorts content producer. Your goal is to find precise video segments for a given topic based on a transcript. Respond ONLY with the requested JSON structure."},
                {"role": "user", "content": segment_prompt_text}
            ]

            logging.info(f"Invoking LLM for segment extraction on topic: '{current_topic}'...")
            try:
                segment_extraction_result = structured_llm_segment.invoke(segment_messages)
                if segment_extraction_result and segment_extraction_result.segments:
                    logging.info(f"Found {len(segment_extraction_result.segments)} segments for topic '{current_topic}'.")
                    for seg_obj in segment_extraction_result.segments:
                        # Add calculated duration and topic to the segment data before storing
                        calculated_duration = seg_obj.end_time - seg_obj.start_time
                        # Convert Pydantic model to dict for easier appending if needed, or store as objects
                        segment_dict = seg_obj.model_dump() # Pydantic V2
                        segment_dict['calculated_duration'] = calculated_duration
                        segment_dict['topic'] = current_topic # Keep track of which topic it came from
                        all_extracted_segments.append(segment_dict)
                else:
                    logging.info(f"No segments found by LLM for topic '{current_topic}'.")
            except Exception as e_invoke:
                logging.error(f"LLM invocation failed for topic '{current_topic}': {e_invoke}", exc_info=True)

    except Exception as e:
        logging.error(f"An error occurred during the segment extraction process: {e}", exc_info=True)
else:
    if not OPENAI_API_KEY:
        logging.warning("OpenAI API key not available. Skipping segment extraction.")
    if not raw_transcript_data:
        logging.warning("Transcript data not available. Skipping segment extraction.")
    if not identified_topics:
        logging.warning("No topics identified. Skipping segment extraction.")

logging.info(f"Total segments extracted across all topics: {len(all_extracted_segments)}")

In [None]:
# --- FFMPEG Snippet Generation ---
if not all_extracted_segments:
    logging.warning("No segments were extracted by the LLM. Skipping FFMPEG processing.")
elif not downloaded_video_path or not os.path.exists(downloaded_video_path):
    logging.error(f"Downloaded video path is not set or file does not exist: {downloaded_video_path}. Cannot generate clips.")
elif not video_base_title:
    logging.error("Video base title is not set. Cannot generate clips.")
else:
    logging.info(f"Starting FFMPEG clip generation for {len(all_extracted_segments)} segments...")
    successful_clips = 0
    failed_clips = 0

    for i, segment_data in enumerate(all_extracted_segments):
        try:
            start_time = segment_data['start_time']
            end_time = segment_data['end_time']
            segment_title_raw = segment_data['title']
            # description = segment_data['description'] # Available if needed
            duration = segment_data.get('calculated_duration', end_time - start_time) # Use calculated or re-calculate

            # Sanitize segment title for filename
            segment_title_sanitized = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in segment_title_raw).rstrip().replace(" ", "_")
            if not segment_title_sanitized: # Handle empty titles
                segment_title_sanitized = f"segment_{i}"

            output_clip_filename = f"{video_base_title}_{segment_title_sanitized}_{duration}s_{i}.mp4"
            output_clip_path = os.path.join(clips_output_directory, output_clip_filename)

            logging.info(f"\nProcessing segment {i + 1}/{len(all_extracted_segments)} for topic '{segment_data.get('topic','N/A')}':")
            logging.info(f"  Source Video: {downloaded_video_path}")
            logging.info(f"  Start: {start_time}s, End: {end_time}s, Duration: {duration}s")
            logging.info(f"  Segment Title (Raw): {segment_title_raw}")
            logging.info(f"  Output File: {output_clip_path}")

            # Construct FFMPEG command (ensure paths with spaces are quoted)
            command = [
                "ffmpeg",
                "-y",  # Overwrite output files without asking
                "-hide_banner",
                "-i", downloaded_video_path,
                "-ss", str(start_time),
                "-to", str(end_time),
                "-c:v", "libx264",
                "-crf", "18",      # Constant Rate Factor (lower is better quality, 18 is visually lossless for many)
                "-preset", "medium", # Encoding speed vs. compression (medium is a good balance)
                "-c:a", "aac",
                "-b:a", "192k",    # Audio bitrate
                output_clip_path
            ]
            # Using list for command is safer with subprocess than f-string for complex commands

            logging.info(f"  Executing FFMPEG command: {' '.join(command)}")
            result = subprocess.run(command, capture_output=True, text=True, check=False)

            if result.returncode == 0:
                logging.info(f"  Successfully generated clip: {output_clip_path}")
                successful_clips += 1
            else:
                logging.error(f"  Error generating clip: {output_clip_path}")
                logging.error(f"  FFMPEG STDOUT: {result.stdout.strip()}")
                logging.error(f"  FFMPEG STDERR: {result.stderr.strip()}")
                failed_clips += 1
        except KeyError as ke:
            logging.error(f"  Missing expected key in segment_data for segment {i}: {ke}. Segment data: {segment_data}", exc_info=True)
            failed_clips += 1
        except FileNotFoundError:
            logging.error("  Error: ffmpeg command not found. Please ensure ffmpeg is installed and in your system's PATH.")
            break # Stop processing further clips if ffmpeg is not found
        except Exception as e:
            logging.error(f"  An unexpected error occurred generating clip for segment {i}: {e}", exc_info=True)
            failed_clips += 1
    logging.info(f"\nFFMPEG processing finished. Successful clips: {successful_clips}, Failed clips: {failed_clips}")
    
logging.info("--- YouTube Clipper Process Complete ---")
if successful_clips > 0 :
    logging.info(f"Review your generated clips in the '{clips_output_directory}' directory.")
else:
    logging.info("No clips were successfully generated. Please review the logs for errors.")

In [None]:
import stable_whisper
# Or: import whisper
import ffmpeg
import json
import os

def extract_transcript_from_mp4(mp4_filepath, output_json_filepath=None, model_name="large"):
    """
    Extracts transcript with timestamps from an MP4 file using Whisper (via stable-ts).
    ... (rest of the docstring) ...
    """
    if not os.path.exists(mp4_filepath):
        print(f"DEBUG: Error - MP4 file not found at {mp4_filepath}")
        return None

    print(f"DEBUG: Loading Whisper model '{model_name}'...")
    try:
        model = stable_whisper.load_model(model_name)
        # model = whisper.load_model(model_name) # If using original openai-whisper
    except Exception as e:
        print(f"DEBUG: Error loading Whisper model: {e}")
        return None

    print(f"DEBUG: Transcribing '{mp4_filepath}'...")
    try:
        # For stable-whisper, you might want to explore its specific transcribe options
        # result = model.transcribe(mp4_filepath, fp16=False, word_timestamps=True, regroup=False) # Example for stable-whisper
        result = model.transcribe(mp4_filepath, fp16=False) # Standard call

        # --- DEBUGGING THE RESULT ---
        print("\n--- DEBUG: Full Whisper Transcription Result ---")
        if result is None:
            print("DEBUG: result is None")
        else:
            print(f"DEBUG: type(result) = {type(result)}")
            if isinstance(result, dict):
                print(f"DEBUG: result.keys() = {result.keys()}")
                if 'text' in result:
                    print(f"DEBUG: result['text'] (first 500 chars) = {result['text'][:500]}")
                if 'segments' in result:
                    print(f"DEBUG: type(result['segments']) = {type(result['segments'])}")
                    if result['segments']:
                        print(f"DEBUG: len(result['segments']) = {len(result['segments'])}")
                        print(f"DEBUG: First segment in result['segments'] = {result['segments'][0] if len(result['segments']) > 0 else 'N/A'}")
                    else:
                        print("DEBUG: result['segments'] is empty.")
                else:
                    print("DEBUG: 'segments' key NOT FOUND in result.")
            else:
                # If using stable_whisper, the result might be a different object type
                # For example, it might be a stable_whisper.result.WhisperResult object
                # You'd need to inspect its attributes or methods as per stable_whisper docs
                print(f"DEBUG: result is not a dictionary. Raw result object: {result}")
                # Example for stable_whisper if result is an object:
                # try:
                #     print(f"DEBUG: Accessing result.segments directly (for stable_whisper object)")
                #     segments_from_result_object = result.segments
                #     if segments_from_result_object:
                #         print(f"DEBUG: len(segments_from_result_object) = {len(segments_from_result_object)}")
                #         print(f"DEBUG: First segment from result object = {segments_from_result_object[0]}")
                #     else:
                #         print("DEBUG: result.segments (object attribute) is empty or None")
                # except AttributeError:
                #     print("DEBUG: result object does not have a .segments attribute directly accessible this way.")


        # --- Processing the result ---
        transcript_segments = []
        # Adjust access based on what the DEBUG prints show for 'result'
        # The original openai-whisper returns a dict. stable-whisper might return an object
        # that can be converted to a dict or accessed differently.

        # Safely try to access segments
        segments_data = None
        if isinstance(result, dict) and 'segments' in result:
            segments_data = result['segments']
        elif hasattr(result, 'segments'): # For objects like stable_whisper.result.WhisperResult
             # stable_whisper's .segments is often a generator or list of segment objects
             # We might need to iterate and convert them to dicts
             print("DEBUG: Accessing segments from result object attribute.")
             temp_segments = []
             try:
                for seg_obj in result.segments: # Iterate if it's a generator/list of objects
                    # Convert stable-whisper segment object to a dictionary if needed
                    # This depends on the exact structure of seg_obj from stable_whisper
                    # A common pattern is:
                    temp_segments.append({
                        'text': getattr(seg_obj, 'text', '').strip(),
                        'start': round(getattr(seg_obj, 'start', 0.0), 2),
                        'end': round(getattr(seg_obj, 'end', 0.0), 2)
                    })
                segments_data = temp_segments
                if segments_data:
                    print(f"DEBUG: Successfully extracted {len(segments_data)} segments from stable_whisper result object.")
                else:
                    print("DEBUG: Failed to extract segments from stable_whisper result object or it was empty.")

             except Exception as e_seg_obj:
                 print(f"DEBUG: Error processing segments from stable_whisper object: {e_seg_obj}")
        
        if segments_data: # Check if segments_data was successfully populated
            for segment in segments_data:
                # Ensure segment is a dict and has the required keys before proceeding
                if isinstance(segment, dict) and 'text' in segment and 'start' in segment and 'end' in segment:
                    transcript_segments.append({
                        "text": segment['text'].strip(),
                        "start": round(segment['start'], 2),
                        "end": round(segment['end'], 2),
                        "duration": round(segment['end'] - segment['start'], 2)
                    })
                else:
                    print(f"DEBUG: Skipping malformed segment: {segment}")

            if transcript_segments:
                print(f"DEBUG: Transcription processing successful. Found {len(transcript_segments)} formatted segments.")
            else:
                print("DEBUG: No valid segments found after processing the transcription result.")
                # This could happen if segments_data was populated but items were malformed
        else:
            print("DEBUG: 'segments_data' is None or empty. Transcription did not produce expected segments.")
            return None # Exit if no segments were found in the result

    except Exception as e:
        print(f"DEBUG: Error during transcription processing: {e}")
        return None

    # Save to JSON
    if not transcript_segments: # Double check before saving
        print("DEBUG: No transcript segments to save.")
        return None

    if output_json_filepath is None:
        base, ext = os.path.splitext(mp4_filepath)
        output_json_filepath = f"{base}_transcript_whisper.json"

    try:
        with open(output_json_filepath, "w", encoding="utf-8") as f:
            json.dump(transcript_segments, f, ensure_ascii=False, indent=4)
        print(f"DEBUG: Transcript saved to: {output_json_filepath}")
    except Exception as e:
        print(f"DEBUG: Error saving transcript to JSON: {e}")

    return transcript_segments

# --- Example Usage ---
if __name__ == "__main__":
    video_download_directory_for_test = "downloaded_video"
    test_video_filename = "The_High-Paying_AI_Job_Nobody_Knows_About__Yet__ft__Rachel_Woods.mp4"
    mp4_file_to_test = os.path.join(video_download_directory_for_test, test_video_filename)

    print(f"Attempting to test transcription with: {mp4_file_to_test}")

    if not os.path.exists(mp4_file_to_test):
        print(f"Error: The test MP4 file '{mp4_file_to_test}' does not exist. Please check the path.")
    else:
        transcript_data = extract_transcript_from_mp4(mp4_file_to_test, model_name="large")

        if transcript_data:
            print("\n--- First 5 Transcript Segments (from example usage) ---")
            for i, segment_info in enumerate(transcript_data[:5]):
                print(f"Segment {i+1}:")
                print(f"  Start: {segment_info['start']:.2f}s, End: {segment_info['end']:.2f}s, Duration: {segment_info['duration']:.2f}s")
                print(f"  Text: {segment_info['text']}")
        else:
            print("\nExample usage: Transcription failed or produced no data.")

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version PyTorch was compiled with: {torch.version.cuda}") # This should ideally be 12.1
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA is NOT available to PyTorch. Check installation and compatibility.")

In [None]:
import torch
import gc # Garbage Collector interface

if 'model' in locals() or 'model' in globals():
    del model # Remove your reference to the model object
    print("Deleted model variable.")

# Explicitly clear PyTorch's CUDA memory cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Cleared PyTorch CUDA cache.")
    # You can also run garbage collection for good measure,
    # though empty_cache is usually the more direct for GPU.
    gc.collect()
    print("Ran Python garbage collection.")

# After this, check nvidia-smi. The memory used by that model should be freed.
# Note: Some base CUDA context memory (~few hundred MBs) might remain as long as PyTorch is active.