# Basic

In [15]:
import sys
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, BaseMessage
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from pydantic import BaseModel, Field
from typing import TypedDict, Annotated
import re
import sqlite3
from dotenv import load_dotenv
load_dotenv()
from typing import List
from langchain_core.messages import BaseMessage

In [26]:
# ------------------ Build LLM ------------------
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

# ------------------ Structured Summary Schema ------------------
class SummaryModel(BaseModel):
    summary: str = Field(description="Concise, clear summary of the text provided.")
    timestamps: float = Field(description="The time (in seconds) from where the answer was taken")

structured_model = model.with_structured_output(SummaryModel)

system_message = SystemMessage(content="""
You are a video summarizer. Analyze this timestamped transcript and provide:

    1. **OVERVIEW**: Brief summary of the video content
    2. **KEY POINTS**: Main points with their timestamps (format: "Point description [timestamp]")
    3. **MAIN TOPICS**: List of topics covered

    Transcript:
    {transcript}

    Format your response clearly with the three sections above.
    Always include timestamps in square brackets for each key point.
""")

# ------------------ Chat State ------------------
class ChatState(TypedDict):
    messages: list[BaseMessage]

# ------------------ Summarization Node ------------------
def summarize_node(state: ChatState, text_to_summarize: str):
    messages = [
        system_message,
        HumanMessage(content=f"Text to summarize:\n{text_to_summarize}")
    ]
    
    response: SummaryModel = structured_model.invoke(messages)
    ai_text = response.summary
    
    return {
        "messages": [
            state["messages"][-1],
            AIMessage(content=ai_text)
        ]
    }

# ------------------ Checkpointer ------------------
conn = sqlite3.connect("summaryDB.db", check_same_thread=False)
checkpointer = SqliteSaver(conn=conn)

# ------------------ Build LangGraph ------------------
def build_summarization_graph(text_to_summarize: str):
    graph = StateGraph(ChatState)
    
    def _summarize_node(state: ChatState):
        return summarize_node(state, text_to_summarize)
    
    graph.add_node("summarize_node", _summarize_node)
    graph.add_edge(START, "summarize_node")
    graph.add_edge("summarize_node", END)
    
    chatbot = graph.compile(checkpointer=checkpointer)
    return chatbot

# ------------------ Load YouTube Transcript ------------------
def load_transcript(url: str) -> str | None:
    """
    Fetch transcript for a YouTube video.
    """
    pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11})'
    match = re.search(pattern, url)
    if match:
        video_id = match.group(1)
        try:
            captions = YouTubeTranscriptApi().fetch(video_id).snippets
            data = [f"{item.text} ({item.start})" for item in captions]
            return " ".join(data)
        except Exception as e:
            print(f"❌ Error fetching transcript: {e}")
            return None

# ------------------ Example Usage ------------------
youtube_url = "https://www.youtube.com/watch?v=5AfJ0N3MvpA"
youtube_captions = load_transcript(youtube_url)

import uuid
summarizer = build_summarization_graph(youtube_captions)

# Create unique thread ID
thread_id = str(uuid.uuid4())

# Initial state
state = {
    "messages": [HumanMessage(content="Summarize this text.")]
}

# Configuration for checkpointer
config = {
    "configurable": {
        "thread_id": thread_id
    }
}

# Invoke summarizer with proper config
output = summarizer.invoke(state, config=config)
print("Summary:\n", output["messages"][-1].content)

Summary:
 The video discusses the future of coding with AI agents, highlighting how AI tools like GitHub Copilot can significantly improve productivity for experienced coders by automating repetitive tasks and accelerating prototype development.


In [27]:
output

{'messages': [HumanMessage(content='Summarize this text.', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The video discusses the future of coding with AI agents, highlighting how AI tools like GitHub Copilot can significantly improve productivity for experienced coders by automating repetitive tasks and accelerating prototype development.', additional_kwargs={}, response_metadata={})]}