In [1]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
from urllib.parse import urlparse, parse_qs
import google.generativeai as genai
import requests
import re

In [2]:
def extract_video_id(url):
    """Extracts the video ID from any YouTube URL format."""
    parsed = urlparse(url)

    # Case 1: Standard desktop link
    if "v" in parse_qs(parsed.query):
        return parse_qs(parsed.query)["v"][0]

    # Case 2: Short link
    if parsed.netloc == "youtu.be":
        return parsed.path.strip("/")

    return None

In [3]:
def get_video_metadata(video_id):
    """Returns the video title and channel name using YouTube oEmbed."""
    try:
        url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            title = data.get("title", "Unknown Title")
            author = data.get("author_name", "Unknown Channel")
            return title, author
        else:
            return "Title not found", "Channel not found"
    except Exception as e:
        return f"Error: {e}", "Channel unavailable"

In [4]:
def get_available_transcript_languages(video_id):
    """Returns a list of available transcript languages."""
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        languages = [t.language for t in transcript_list]
        return languages
    except NoTranscriptFound:
        return []
    except Exception as e:
        return f"Error fetching transcript list: {e}"

In [5]:
def get_best_transcript_language(video_id):
    """Returns the language of the best available transcript (manual preferred)."""
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        for t in transcript_list:
            if not t.is_generated:
                return t.language
        # If no manual transcript is found, return the language of the first one if available
        for t in transcript_list:
            return t.language
        return "No transcript available"
    except NoTranscriptFound:
        return "No transcript available"
    except Exception as e:
        return f"Error fetching transcript list: {e}"

In [6]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound

def get_youtube_transcript_text(video_id):
    """Fetches the text of the best available transcript."""
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        selected_transcript = None
        for t in transcript_list:
            if not t.is_generated:
                selected_transcript = t
                break
        if selected_transcript is None and transcript_list:
            selected_transcript = list(transcript_list)[0]
            print(f"Selected auto-generated transcript: {selected_transcript.language}")
        elif selected_transcript:
            print(f"Selected manual transcript: {selected_transcript.language}")
        else:
            print("No suitable transcript found in the list.")
            return "No transcript text available"

        if selected_transcript:
            transcript_entries = selected_transcript.fetch()
            transcript_text = " ".join([entry.text for entry in transcript_entries])
            return transcript_text
        else:
            return "No transcript text available"

    except NoTranscriptFound:
        print(f"NoTranscriptFound for video ID: {video_id}")
        return "No transcript found for this video."
    except Exception as e:
        print(f"Error fetching transcript for video ID {video_id}: {e}")
        return f"Error fetching transcript: {e}"

In [7]:
def detect_video_type(transcript_preview):
    """Rudimentary function to detect video type based on a short transcript preview."""
    educational_keywords = ["learn", "explain", "tutorial", "guide", "concept", "theory"]
    motivational_keywords = ["inspire", "motivate", "success", "achieve", "dream", "believe"]
    product_keywords = ["review", "features", "demo", "unboxing", "test", "comparison"]
    news_keywords = ["breaking", "report", "update", "news", "politics", "world"]

    preview_lower = transcript_preview.lower()

    if any(keyword in preview_lower for keyword in educational_keywords):
        return "educational"
    elif any(keyword in preview_lower for keyword in motivational_keywords):
        return "motivational"
    elif any(keyword in preview_lower for keyword in product_keywords):
        return "product review/tutorial"
    elif any(keyword in preview_lower for keyword in news_keywords):
        return "news/analysis"
    else:
        return "general"

In [8]:
def generate_prompt(transcript, question=None, video_type="general", chat_history=None):
    """Generates a prompt for Gemini model for multi-turn chat."""
    base_prompts = {
        "educational": "You are a helpful tutor. Engage in a multi-turn interactive Q&A session based on the transcript.",
        "motivational": "You are a motivational content explainer. Engage in a multi-turn conversation based on the transcript.",
        "product review/tutorial": "You are a product assistant helping users understand tech reviews or tutorials. Engage in a multi-turn conversation based on the transcript.",
        "news/analysis": "You are a factual news analyst. Engage in a multi-turn conversation based on the transcript.",
        "general": "You are a smart assistant that explains things clearly. Engage in a multi-turn conversation based on the transcript."
    }

    role = base_prompts.get(video_type.lower(), base_prompts["general"])

    prompt = f"""{role}
Based on the transcript below, answer the user's questions and engage in a conversation.

Transcript:
{transcript[:12000]}

"""

    if chat_history:
        for user_msg, bot_msg in chat_history:
            prompt += f"User: {user_msg}\nBot: {bot_msg}\n"

    prompt += f"User: {question}\nBot: "

    return prompt

In [9]:
def chat_with_transcript(model, transcript, video_type="general"):
    """Initiates a multi-turn chat with the Gemini model based on a video transcript."""
    chat_history = []
    while True:
        question = input("User: ")
        if question.lower() in ["exit", "quit", "bye"]:
            print("Bot: Goodbye!")
            break

        prompt = generate_prompt(transcript, question, video_type, chat_history=chat_history)
        response = model.generate_content(prompt)
        answer = response.text
        print(f"Bot: {answer}")
        chat_history.append((question, answer))

In [10]:
# 1. Get API Key
# GOOGLE_API_KEY = input('Enter your Google API Key: ')
GOOGLE_API_KEY='AIzaSyCfPleOdsJTDFEBT6uW72JaHFWonw-6DmI'
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('models/gemini-2.0-flash')

In [11]:
# 2. Get YouTube Video URL
# yt_url = input("Enter the YouTube video URL: ")
yt_url = 'https://www.youtube.com/watch?v=J4Hd5wudIrk'
video_id = extract_video_id(yt_url)

if not video_id:
    print("Invalid YouTube URL.")
    exit()

In [12]:
 # 3. Display Video Title and Channel Name
title, channel = get_video_metadata(video_id)
print(f"\nVideo Title: {title}")
print(f"Channel Name: {channel}")


Video Title: Career Advice For A World After AI
Channel Name: Varun Mayya


In [13]:
# 4. Show Transcript Language
transcript_language = get_best_transcript_language(video_id)
print(f"Transcript Language: {transcript_language}")

Transcript Language: English (auto-generated)


In [14]:
# 5. Detect Video Type
transcript_preview = get_youtube_transcript_text(video_id)[:500].lower()
video_type = detect_video_type(transcript_preview)
print(f"Detected Video Type: {video_type}")

Selected auto-generated transcript: English (auto-generated)
Detected Video Type: educational


In [15]:
# 6. Start Chat with LLM
transcript_text = get_youtube_transcript_text(video_id)
if transcript_text and transcript_text != "No transcript found for this video." and transcript_text != "No transcript text available":
    print("\nStarting chat with the LLM. Type 'exit', 'quit', or 'bye' to end.")
    chat_with_transcript(model, transcript_text, video_type)
else:
    print("Could not start chat because the transcript is not available.")

Selected auto-generated transcript: English (auto-generated)

Starting chat with the LLM. Type 'exit', 'quit', or 'bye' to end.
Bot: Okay, I can help with that! Based on the transcript, here's a summary of the advice given:

*   **Don't blindly follow tutorial makers:** They often don't have the best insights into the future, especially regarding rapidly evolving fields like AI.

*   **Pay attention to experimental practitioners:** They might be working on things that seem strange or irrelevant now, but could be the future. Don't dismiss them just because you don't understand their work immediately.

*   **Build the future yourself:** The best way to predict the future is to experiment and create things. This gives you firsthand knowledge of the capabilities and limitations of new technologies.

*   **Focus on reasoning and problem-solving:** AI is increasingly handling the coding aspect of software development. Therefore, skills like problem diagnosis, communication, and understanding