In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from pydantic import BaseModel, Field
import re
def load_transcript(url: str) -> str | None:
    """
    Fetch transcript for a YouTube video.
    """
    pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11})'
    match = re.search(pattern, url)
    if match:
        video_id = match.group(1)
        try:
            captions = YouTubeTranscriptApi().fetch(video_id,languages=['en','hi']).snippets
            data = [f"{item.text} ({item.start})" for item in captions]
            return " ".join(data)
        except Exception as e:
            print(f"❌ Error fetching transcript: {e}")
            return None

In [32]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from pydantic import BaseModel, Field
from typing import List

class Quiz(BaseModel):
    question: str = Field(description="A well-formed multiple-choice quiz question")
    options: List[str] = Field(description="List of 4 possible answer options")
    correct_answer: str = Field(description="The correct answer from the options list")
    timestamp : float = Field(description="timestamp from where the quesiton was picked from the transcripts")
class QuizList(BaseModel):
    quizzes: List[Quiz] = Field(description="List of 10 quiz questions with options and answers")

# ---- Prompt Template ----
system_template = """
You are QuizBot, an AI assistant that creates professional quizzes.
Your task is to generate exactly 10 multiple-choice questions from the provided YouTube transcript.
Each question must:
- Be clear, concise, and relevant to the transcript content
- Include 4 answer options (A, B, C, D)
- Clearly specify the correct answer
Format the response strictly as structured data according to the schema provided.
Do not include explanations, context, or additional text outside the schema.
"""

user_template = """
Here is the YouTube video transcript:
{transcript}
"""

quiz_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(user_template),
])


In [None]:
from langchain.output_parsers import PydanticOutputParser
captions = load_transcript(url="https://www.youtube.com/watch?v=s3KnSb9b4Pk")
model = ChatGoogleGenerativeAI(model = 'gemini-2.5-flash' , temperature=0)
structured_llm = model.with_structured_output(QuizList)

response = structured_llm.invoke(
    quiz_prompt.format_prompt(transcript=captions).to_messages()
)

__main__.QuizList

In [37]:
for x in response.quizzes:
    print(x.question)
    print(x.options)
    print(x.correct_answer)
    print(x.timestamp)
    print("--"*20)

What is the primary focus of the video's roadmap for learning AI in 2025?
['A) Data analysis', 'B) Web development', 'C) AI, specifically Generative AI and Agentic AI', 'D) Cloud computing']
C) AI, specifically Generative AI and Agentic AI
25.519
----------------------------------------
According to the speaker, what percentage of companies are currently working on different kinds of Generative AI applications?
['A) Less than 50%', 'B) More than 80%', 'C) Exactly 60%', 'D) Around 20%']
B) More than 80%
55.92
----------------------------------------
How many different learning routes are outlined in the AI roadmap presented in the video?
['A) One', 'B) Two', 'C) Three', 'D) Four']
C) Three
135.84
----------------------------------------
Which route is recommended for freshers or those starting from scratch in AI, even in the current scenario?
['A) Modern Route', 'B) Advanced Route', 'C) Traditional Route', 'D) Non-technical Route']
C) Traditional Route
372.88
---------------------------