In [36]:
import os
import tempfile
import subprocess
import whisper

from youtube_transcript_api import (
    YouTubeTranscriptApi,
    RequestBlocked,
    NoTranscriptFound,
    TranscriptsDisabled,
    VideoUnavailable,
)

from typing import TypedDict, Optional

class State(TypedDict):
    url: str
    user_query: str
    transcript: Optional[str]
    llama3_output: Optional[str]
    mixtral_output: Optional[str]
    final_output: Optional[str]


# load Whisper model once (use "tiny" for speed)
whisper_model = whisper.load_model("tiny")  # or "base" if your PC is strong


def _extract_video_id(url: str) -> str:
    """Handle normal YouTube URLs and youtu.be short links with params."""
    if "v=" in url:
        return url.split("v=")[-1].split("&")[0]
    last_part = url.split("/")[-1]
    return last_part.split("?")[0].split("&")[0]


def _transcribe_audio_locally(url: str) -> str:
    """
    Downloads audio using yt-dlp and transcribes with local Whisper (offline).
    Returns plain text.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        audio_path = os.path.join(tmpdir, "audio.webm")

        cmd = [
            "yt-dlp",
            "-x",                   # extract audio
            "--audio-format", "webm",
            "-o", audio_path,
            url,
        ]

        proc = subprocess.run(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )

        if proc.returncode != 0:
            print("[yt-dlp stdout]:", proc.stdout)
            print("[yt-dlp stderr]:", proc.stderr)
            raise RuntimeError(f"yt-dlp failed with code {proc.returncode}")

        # Transcribe with Whisper (local)
        result = whisper_model.transcribe(audio_path)
        return result["text"]


def transcript_generator(state: State) -> State:
    """
    1. Try YouTubeTranscriptApi (captions).
    2. If blocked/unavailable → download audio + transcribe with Whisper.
    3. Store final transcript in state["transcript"] (or None if everything fails).
    """
    url = state["url"]
    video_id = _extract_video_id(url)

    ytt_api = YouTubeTranscriptApi()

    # ---- First try: captions API ----
    try:
        segments = ytt_api.fetch(video_id).to_raw_data()
        transcript = " ".join(seg["text"] for seg in segments)
        state["transcript"] = transcript
        return state
    except (VideoUnavailable, RequestBlocked, NoTranscriptFound, TranscriptsDisabled) as e:
        print(f"[Caption API failed → Whisper fallback] {e}")
    except Exception as e:
        print(f"[Caption API unexpected error → Whisper fallback] {e}")

    # ---- Second try: audio + Whisper ----
    try:
        transcript = _transcribe_audio_locally(url)
        state["transcript"] = transcript
    except Exception as e:
        print(f"[Whisper fallback failed] {e}")
        state["transcript"] = None

    return state


100%|█████████████████████████████████████| 72.1M/72.1M [00:13<00:00, 5.49MiB/s]


In [33]:
state = {
    "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
    
    "user_query": "Generate notes",
    "transcript": None,
    "llama3_output": None,
    "mixtral_output": None,
    "final_output": None,
}

result = transcript_generator(state)
print(result["transcript"][:300])


[♪♪♪] ♪ We're no strangers to love ♪ ♪ You know the rules
and so do I ♪ ♪ A full commitment's
what I'm thinking of ♪ ♪ You wouldn't get this
from any other guy ♪ ♪ I just wanna tell you
how I'm feeling ♪ ♪ Gotta make you understand ♪ ♪ Never gonna give you up ♪ ♪ Never gonna let you down ♪ ♪ Never g


In [25]:
print(state['transcript'])

[♪♪♪] ♪ We're no strangers to love ♪ ♪ You know the rules
and so do I ♪ ♪ A full commitment's
what I'm thinking of ♪ ♪ You wouldn't get this
from any other guy ♪ ♪ I just wanna tell you
how I'm feeling ♪ ♪ Gotta make you understand ♪ ♪ Never gonna give you up ♪ ♪ Never gonna let you down ♪ ♪ Never gonna run around
and desert you ♪ ♪ Never gonna make you cry ♪ ♪ Never gonna say goodbye ♪ ♪ Never gonna tell a lie
and hurt you ♪ ♪ We've known each other
for so long ♪ ♪ Your heart's been aching
but you're too shy to say it ♪ ♪ Inside we both know
what's been going ♪ ♪ We know the game
and we're gonna play it ♪ ♪ And if you ask me
how I'm feeling ♪ ♪ Don't tell me
you're too blind to see ♪ ♪ Never gonna give you up ♪ ♪ Never gonna let you down ♪ ♪ Never gonna run around
and desert you ♪ ♪ Never gonna make you cry ♪ ♪ Never gonna say goodbye ♪ ♪ Never gonna tell a lie
and hurt you ♪ ♪ Never gonna give you up ♪ ♪ Never gonna let you down ♪ ♪ Never gonna run around
and desert you ♪ ♪ Never gon

In [None]:
#Creating state schema for nodes
from typing import TypedDict, Optional
from dotenv import load_dotenv
import os

load_dotenv()  # reads the .env file

from groq import Groq
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

class State(TypedDict):
    # input from user
    url: str                  # YouTube URL
    user_query: str           # what the user wants (notes, summary, etc.)

    # intermediate data
    transcript: Optional[str] # filled by transcript_generator
    llama3_output: Optional[str]   # will be filled by LLaMA3 node
    mixtral_output: Optional[str]  # will be filled by Mixtral node

    # final result
    final_output: Optional[str]    # final answer from Qwen


KeyError: 'GROQ_API_KEY'

In [38]:
state: State = {
    "url": "https://www.youtube.com/watch?v=dIb-DujRNEo&list=PLZoTAELRMXVPFd7JdvB-rnTb_5V26NYNO&index=5",
    "user_query": "Generate notes",
    "transcript": None,
    "llama3_output": None,
    "mixtral_output": None,
    "final_output": None,
}

state = transcript_generator(state)

if state["transcript"]:
    print(state["transcript"][:500])
else:
    print("Transcript failed (both captions and Whisper).")


Hello guys. So welcome to this amazing crash course on building agentic AI application with the help of Langraph. This entire crash course has been divided into three important parts and each and every part will be somewhere around 2 to three hours of videos. Right. And here you can basically see what in which way we are going to cover all the topics and uh where we are going to aim once we reach to the part three. Okay. So in the part one you'll be able to see that we will be covering various f


In [39]:
print(state['transcript'])

Hello guys. So welcome to this amazing crash course on building agentic AI application with the help of Langraph. This entire crash course has been divided into three important parts and each and every part will be somewhere around 2 to three hours of videos. Right. And here you can basically see what in which way we are going to cover all the topics and uh where we are going to aim once we reach to the part three. Okay. So in the part one you'll be able to see that we will be covering various fundamental techniques which are really really important in order to build agentic AI application. some of the important topics like how to build a chatbot, how to integrate tools, how to integrate multiple tools in a chatbot, you know, how to add memory, how to add human in the loop like human feedbacks when you're executing the entire graph state, how to use different streaming technique, how to probably go ahead and use MCP, how to build MCP completely from scratch, right? So this part also we

In [40]:
#Creatin a tool that takes transcripts as input and response the title and content of the video

def transcript_to_title_and_content(state: State) -> State:
    """
    Uses Groq LLaMA3 and Mixtral to turn the transcript into:
    - a good title
    - well-structured content/summary

    Saves:
      - raw llama3_output
      - raw mixtral_output
      - final title + content into state.
    """
    transcript = state.get("transcript")

    if not transcript:
        state["final_output"] = "No transcript available to generate title and content."
        state["title"] = None
        state["content"] = None
        return state

    # 1) First pass: LLaMA3 creates initial title + content
    system_prompt_llama = (
        "You are an expert at summarizing educational YouTube lectures. "
        "Given a full transcript, you must generate:\n"
        "1) A concise, catchy title (max ~12 words).\n"
        "2) A detailed, well-structured content/notes section.\n\n"
        "Return STRICTLY in this format:\n\n"
        "TITLE: <your title here>\n\n"
        "CONTENT:\n<your multi-paragraph content here>\n"
    )

    llama_resp = groq_client.chat.completions.create(
        model="llama3-70b-8192",
        messages=[
            {"role": "system", "content": system_prompt_llama},
            {"role": "user", "content": transcript},
        ],
        temperature=0.5,
    )
    llama_out = llama_resp.choices[0].message.content
    state["llama3_output"] = llama_out

    # 2) Second pass: Mixtral refines and improves LLaMA3 output
    system_prompt_mixtral = (
        "You are a careful editor. You will receive a draft TITLE and CONTENT. "
        "Improve clarity, structure, and flow, but keep the meaning.\n\n"
        "Return in EXACTLY this format:\n\n"
        "TITLE: <improved title>\n\n"
        "CONTENT:\n<improved multi-paragraph content>\n"
    )

    mixtral_resp = groq_client.chat.completions.create(
        model="mixtral-8x7b-32768",
        messages=[
            {"role": "system", "content": system_prompt_mixtral},
            {"role": "user", "content": llama_out},
        ],
        temperature=0.4,
    )
    mixtral_out = mixtral_resp.choices[0].message.content
    state["mixtral_output"] = mixtral_out

    # 3) Parse Mixtral output into title + content
    title = None
    content = None

    if "CONTENT:" in mixtral_out:
        head, body = mixtral_out.split("CONTENT:", 1)
        # remove "TITLE:" prefix if present
        title = head.replace("TITLE:", "").strip().strip(":")
        content = body.strip()
    else:
        # fallback: use full text as content, first line as title
        lines = mixtral_out.strip().splitlines()
        title = lines[0].strip() if lines else "Generated Title"
        content = mixtral_out

    # 4) Save final values in state
    state["title"] = title
    state["content"] = content
    state["final_output"] = content  # if you want this as main response

    return state
