# Rag Structured Output

In [2]:

from langchain_core.messages import HumanMessage
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from youtube_transcript_api import YouTubeTranscriptApi
import re      
            
            
# _-----------------------------------------------------FUNCTIONS FOR RAG----------------------------------------------

# ------------------ Transcript Loader ------------------
def load_transcript(url: str) -> str | None:
    pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11})'
    match = re.search(pattern, url)
    if match:
        video_id = match.group(1)
        try:
            captions = YouTubeTranscriptApi().fetch(video_id).snippets
            # join text + start_time
            data = [f"{item.text} ({item.start})" for item in captions]
            return " ".join(data)
        except Exception as e:
            print(f"Error fetching transcript: {e}")
            return None

# ------------------ Text Splitter ------------------
def text_splitter(transcript):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.create_documents([transcript])

# ------------------ Vector Store & Retriever  ------------------
def generate_embeddings(chunks):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return FAISS.from_documents(chunks, embeddings)

def retriever_docs(vector_store):
    return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

def format_docs(retrieved_docs):
    return "\n\n".join(doc.page_content for doc in retrieved_docs)


In [3]:
# ------------------ Imports ------------------
from dotenv import load_dotenv
load_dotenv()

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from pydantic import BaseModel, Field
from typing import TypedDict, Annotated
import re
import os
from langchain.prompts import PromptTemplate
#os.environ["LANGCHAIN_PROJECT"] = "TubeTalkAI Testing"

# ------------------ Build LLM (Gemini) ------------------
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

#--------------------Prompt Template----------------------
template = """
You are the YouTuber from the video, directly answering the viewer’s question.

Rules:
1. ONLY use the transcript provided below.
2. Give the answer in simple, clear sentences — without timestamps inside the text.
3. ALWAYS return the exact timestamp (in seconds) from the transcript line you used.
   - Do NOT round or estimate timestamps.
   - If multiple transcript parts are relevant, return the most direct one.
4. Do NOT add greetings, filler, or extra commentary.
5. If the transcript does not answer, say: "Sorry, I didn’t talk about that in this video."

Transcript:
{transcript}

Question:
{question}

Output format (for schema):
- "answer": A list of 1–3 short strings that directly answer the question (no timestamps here).
- "timestamps": The exact timestamp (in seconds) from the transcript where the answer was found.
"""




prompt = PromptTemplate(
    input_variables=["transcript", "question"],
    template=template,
)
# ------------------ Structured Output Schema ------------------
class AnsandTime(BaseModel):
    answer:str = Field(
        description="Answers to user's question (do NOT include timestamps here)"
    )
    timestamps: float = Field(
        description="The time (in seconds) from where the answer is taken"
    )

structured_model = model.with_structured_output(AnsandTime)

# ------------------ ChatState ------------------
class ChatState(TypedDict):
    messages: Annotated[list[BaseMessage], "add_messages"]

# ------------------ Chat Node ------------------
def chat_node(state: ChatState):
    # Extract user question from state
    user_message = state["messages"][-1].content  # last message is the user's input

    # Fill the prompt
    final_prompt = prompt.format(
        transcript=context,   # <-- your transcript goes here
        question=user_message
    )

    # Get structured output
    response = structured_model.invoke(final_prompt)
    ai_text = f"{response.answer}\nTimestamp: {response.timestamps}"

    return {
        "messages": [
            state["messages"][-1],  # include the HumanMessage again
            AIMessage(content=ai_text)  # add the AI reply
        ]
    }

# ------------------ Build Graph ------------------
checkpointer = InMemorySaver()

graph = StateGraph(ChatState)
graph.add_node("chat_node", chat_node)
graph.add_edge(START, "chat_node")
graph.add_edge("chat_node", END)

CONFIG = {'configurable': {'thread_id': "newthread"}}
workflow =graph.compile(checkpointer = checkpointer)

In [4]:
# ------------------ Load YouTube Transcript ------------------
youtube_input = "https://www.youtube.com/watch?v=s3KnSb9b4Pk"
youtube_captions = load_transcript(youtube_input)
print("Transcript Loaded:", youtube_captions[:200], "...")

# Split & Embed transcript
chunks = text_splitter(youtube_captions)
vector_store = generate_embeddings(chunks)
retriever = retriever_docs(vector_store)


Transcript Loaded: Hello all, my name is Krishna Nayak and (0.48) welcome to my YouTube channel. So guys, (2.56) today in this particular video, we are (4.96) going to go ahead and see the entire (7.12) road map to lear ...


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
output_dict = {"human": [], "ai": []}
CONFIG = {'configurable': {'thread_id': "newthread"}}



In [6]:
while True : 
    user_input = input("User : ")
    if user_input == 'exit':
        break
    print("user :", user_input)
    retrieved_chunks = retriever.get_relevant_documents(user_input)
    context = format_docs(retrieved_chunks)
    result = workflow.invoke(
            {'messages': [HumanMessage(content=user_input)]},
            config=CONFIG,
        )
    for msg in result['messages']:
        if isinstance(msg, HumanMessage):
            if msg.content not in output_dict['human']:
                output_dict['human'].append(msg.content)
        elif isinstance(msg, AIMessage):
            if msg.content not in output_dict['ai']:
                output_dict['ai'].append(msg.content)

    print("AI:", output_dict['ai'][-1])

user : what is this video about?


  retrieved_chunks = retriever.get_relevant_documents(user_input)


AI: This video is about the entire road map to learn AI in 2025, including free resources, videos, and materials.
Timestamp: 9.679
user : when to learn Modern AI
AI: Nowadays, I usually prefer the modern route for learning AI.
Timestamp: 413.44
user : how my days does it take to learn Data Science
AI: Sorry, I didn’t talk about that in this video.
Timestamp: 0.0
user : how to many months does it take to learn Data Science
AI: It takes four months to learn data science, NLP, and computer vision.
Timestamp: 823.36
user : how many days does it takes to learn Data Science
AI: It takes four months to learn data science, NLP, and computer vision.
Timestamp: 823.36
user : what are the different Routes present in this Model
AI: There are three different routes: the traditional route, the modern route, and the advanced route.
Timestamp: 135.84
user : what is Advancd Route
AI: The advanced route is for really efficient people who are already in the technical domain and can quickly learn things. 

In [12]:
context

"definitely (123.36) be able to do amazing things over here (125.36) so quickly I will go ahead and share my (127.6) screen so here you can see that I have (129.599) written this amazing road map to learn (131.52) AI in 2025 (133.76) and here I have you know drawn three (135.84) different routes one is the traditional (139.36) route (142.56) The second one is the modern route and (143.84) the third one is something called as an (146.64) advanced route. Now why do we actually (148.64) require this routes? I will discuss (152.56) about this in a much more detailed (154.8) manner. So let's say uh as I said this (156.48) road map actually incorporates for (160.08) everyone whether you are a fresher (162.8) whether you are an experienced (166.0) professional (167.68) whether you are a leader you are in a (169.76) leadership position and whether you are (172.56) a person who is also coming from a (175.519) complete non-technical background. Okay, (177.92) nontechnical background basically\n\

In [None]:
print(output_dict['ai'][-1])

This video is about AI and geometry, and how a non-AI model was already better at geometry than most humans.
Timestamp: 80.44


In [None]:
result['messages']

[HumanMessage(content='what is this Video About', additional_kwargs={}, response_metadata={}),
 AIMessage(content='This video is about AI and geometry, and how a non-AI model was already better at geometry than most humans.\nTimestamp: 80.44', additional_kwargs={}, response_metadata={})]

# Rag using System Message and MemorySaver

In [1]:

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage , HumanMessage , AIMessage
from youtube_transcript_api import YouTubeTranscriptApi
import re      
from langchain_google_genai import ChatGoogleGenerativeAI            
from dotenv import load_dotenv
load_dotenv()

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from pydantic import BaseModel, Field
from typing import TypedDict, Annotated
import re
import os
from langchain.prompts import PromptTemplate          
# _-----------------------------------------------------FUNCTIONS FOR RAG----------------------------------------------

# ------------------ Transcript Loader ------------------
def load_transcript(url: str) -> str | None:
    pattern = r'(?:v=|\/)([0-9A-Za-z_-]{11})'
    match = re.search(pattern, url)
    if match:
        video_id = match.group(1)
        try:
            captions = YouTubeTranscriptApi().fetch(video_id).snippets
            # join text + start_time
            data = [f"{item.text} ({item.start})" for item in captions]
            return " ".join(data)
        except Exception as e:
            print(f"Error fetching transcript: {e}")
            return None

# ------------------ Text Splitter ------------------
def text_splitter(transcript):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.create_documents([transcript])

# ------------------ Vector Store & Retriever  ------------------
def generate_embeddings(chunks):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return FAISS.from_documents(chunks, embeddings)

def retriever_docs(vector_store):
    return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

def format_docs(retrieved_docs):
    return "\n\n".join(doc.page_content for doc in retrieved_docs)

import os

def save_embeddings_faiss(thread_id: str, transcript: str):
    # 1. Split transcript
    chunks = text_splitter(transcript)

    # 2. Build embeddings + FAISS store
    vector_store = generate_embeddings(chunks)

    # 3. Save FAISS index
    save_dir = f"faiss_indexes/{thread_id}"
    os.makedirs("faiss_indexes", exist_ok=True)
    vector_store.save_local(save_dir)

    print(f"✅ Embeddings for {thread_id} saved at {save_dir}")
    
def load_embeddings_faiss(thread_id: str):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    save_dir = f"faiss_indexes/{thread_id}"

    if not os.path.exists(save_dir):
        raise ValueError(f"No FAISS index found for thread_id: {thread_id}")

    vector_store = FAISS.load_local(save_dir, embeddings, allow_dangerous_deserialization=True)
    retriever = retriever_docs(vector_store)
    return retriever


In [2]:
from pydantic import BaseModel , Field

# ------------------ Build LLM (Gemini) ------------------
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)

# ------------------ System Message ------------------
system_message = SystemMessage(content="""
You are the YouTuber from the video, directly answering the viewer’s question.

Rules:
1. ONLY use the transcript provided below.
2. Give the answer in clear, simple bullet points (not paragraphs).
3. Each bullet must include the exact timestamp (in seconds) from the transcript line used.
   - Do NOT round or estimate timestamps.
   - If multiple transcript parts are relevant, use separate bullets.
4. Do NOT add greetings, filler, or extra commentary.
5. If the transcript does not answer, say:
   - "Sorry, I didn’t talk about that in this video."
6. Greet only if the viewer greets first.
7. Always remember the viewer’s question when structuring the answer.
""")

# ------------------ Structured Schema ------------------
class AnsandTime(BaseModel):
    answer: list[str] = Field(description="Answers to user's question (no timestamps here)")
    timestamps: float = Field(description="The time (in seconds) from where the answer was taken")

structured_model = model.with_structured_output(AnsandTime)

# ------------------ Chat State ------------------
class ChatState(TypedDict):
    messages: Annotated[list[BaseMessage], add_messages]

# ------------------ Chat Node ------------------
def chat_node(state: ChatState):
    user_question = state['messages'][-1].content
    
    # get context here
    retrieved_chunks = retriever.get_relevant_documents(user_question)
    context = format_docs(retrieved_chunks)

    # build messages
    messages = [
        system_message,  # rules
        SystemMessage(content=f"Transcript:\n{context}"),  # context for model only
        HumanMessage(content=user_question)  # clean user input
    ]

    response = structured_model.invoke(messages)
    ai_text = f"{' '.join(response.answer)}\nTimestamp: {response.timestamps}"

    return {
        "messages": [
            state['messages'][-1],       # store user only
            AIMessage(content=ai_text)   # store ai only
        ]
    }


In [3]:
from langgraph.checkpoint.sqlite import SqliteSaver
import sqlite3
# ------------------  Checkpointer ------------------
conn = sqlite3.connect(database="ragDatabase.db", check_same_thread=False)
checkpointer = SqliteSaver(conn=conn)

# ------------------ Build Graph ------------------
graph = StateGraph(ChatState)
graph.add_node("chat_node", chat_node)
graph.add_edge(START, "chat_node")
graph.add_edge("chat_node", END)
chatbot = graph.compile(checkpointer=checkpointer)
# ------------------ Example Run ------------------
thread_id = "thread5519"

CONFIG = {"configurable": {"thread_id": thread_id}}

In [4]:
# ------------------ Load YouTube Transcript ------------------
youtube_input = "https://www.youtube.com/watch?v=s3KnSb9b4Pk&t"
youtube_captions = load_transcript(youtube_input)

save_embeddings_faiss(thread_id=thread_id , transcript= youtube_captions)

  from .autonotebook import tqdm as notebook_tqdm


✅ Embeddings for thread5519 saved at faiss_indexes/thread5519


In [5]:
retriever = load_embeddings_faiss(thread_id=thread_id)
retrieved_chunks = retriever.get_relevant_documents("how many days months does it takes to learn DataScience")
context = format_docs(retrieved_chunks)
context

  retrieved_chunks = retriever.get_relevant_documents("how many days months does it takes to learn DataScience")


"science I would basically say (211.36) okay go ahead and first learn data (213.92) science first. Okay so here I will go (216.4) ahead and write here you have to learn (218.239) data science first. Now what you really (221.68) need to learn in data science. Okay here (224.959) you need to master some amazing things (227.76) like what do you need to master? you (229.84) need to master (232.08) DS that is data science, ML, (233.92) CV, (238.56) NLP. Okay. So here you need to master (240.159) data science, machine learning, computer (243.36) vision, NLP, right? And in all the (244.959) specific topics you should be able to (247.76) understand that stats and linear algebra (250.319) is included a part of it. So in short, (252.959) you're mastering all these technologies (254.959) like data science, machine learning, CV (257.759) and LP. And whenever I'm talking about (259.6) mastering, I'm talking with respect to (261.519) development of a end to-end project. (263.759) Okay. So first of\n

In [6]:
retriever = load_embeddings_faiss(thread_id=thread_id)
while True : 
    user_input = input("User : ")
    if user_input == 'exit':
        break
    print("user :", user_input)
    result = chatbot.invoke(
            {'messages': [HumanMessage(content=user_input)]},
            config=CONFIG,
        )
    print("AI :" ,result['messages'][-1].content)


user : what is this video abot


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 16
}
].


AI : This video is about the roadmap to learn AI in 2025. It includes free resources, videos, and materials. The video covers generative AI and agentic AI applications. It provides a roadmap for learning AI within 8 months, even for those with 10+ years of experience and no coding background. The roadmap includes three stages with specific paths and free resources.
Timestamp: 2.56


In [7]:
conn = sqlite3.connect(database="ragDatabase.db", check_same_thread=False)
checkpointer = SqliteSaver(conn=conn)
cursor = conn.cursor()
cursor.execute("SELECT DISTINCT thread_id FROM checkpoints")
threads = [row[0] for row in cursor.fetchall()]
print(threads)

['thread1']


In [8]:
def load_conversation(chatbot , thread_id  ):
    return chatbot.get_state(config={'configurable': {'thread_id': thread_id}}).values['messages']
load_conversation(chatbot=chatbot , thread_id = "thread1")

[HumanMessage(content='what is this video about?', additional_kwargs={}, response_metadata={}, id='d12e1858-e6eb-4f3b-b3fb-661d8c2e5bdf'),
 AIMessage(content="This video is about the roadmap to learn AI in 2025. It provides free resources, videos, and materials. The video covers generative AI and agentic AI applications. It's designed for various backgrounds, including developers, program managers, and leadership positions. A three-stage roadmap is presented, with specific details for each stage. The roadmap includes machine learning, deep learning, NLP, and MLOps. It also covers tools like CircleCI, AI graphana, airflow, bento ML, AWS sagemaker, DVC, and dockers.\nTimestamp: 2.56", additional_kwargs={}, response_metadata={}, id='890cad82-a47b-4422-a7d9-3bdb74069839'),
 HumanMessage(content='when to learn DataScience', additional_kwargs={}, response_metadata={}, id='67d9860d-bf88-44bc-a73a-38ab37fc57a7'),
 AIMessage(content='Learn data science first To master data science, you need to 