In [None]:
import os 
from dotenv import load_dotenv
load_dotenv()

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate



In [None]:
video_id="XBuv4HHTRjI"
try:
    transcriptlist=YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
    transcript=" ".join(chunk["text"] for chunk in transcriptlist)
    print(transcript)
except TranscriptsDisabled:
    print("No Captions")    

In [None]:
transcriptlist

In [None]:
splitter= RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks=splitter.create_documents([transcript])

In [None]:
len(chunks)

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store=FAISS.from_documents(chunks,embeddings)


In [None]:
vector_store.index_to_docstore_id

In [None]:
retriever=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":4})

In [None]:
retriever

In [None]:
retriever.invoke("Tell me about DeepMind")

In [None]:
llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.2)

In [None]:
prompt=PromptTemplate(
    template="""You are a helpful assistant.
    Answer only from the provided transcript context
    if the context is insufficient ,jusy say you dont know
    
    {context}
    Question:{question}
    """,
    input_variables=['context','question']
    )


In [None]:
question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

In [None]:
retrieved_docs

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

In [None]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [None]:
final_prompt

In [None]:
answer=llm.invoke(final_prompt)
print(answer.content)

In [None]:
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser


In [None]:
def format_docs(retrieved_docs):
    context_text="\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [None]:
parallel_chain=RunnableParallel({
    'context': retriever|RunnableLambda(format_docs),
    'question':RunnablePassthrough()
})

In [None]:
parallel_chain.invoke('Who is Demis')

In [None]:
parser=StrOutputParser()

In [None]:
main_chain =parallel_chain|prompt|llm|parser

In [None]:
main_chain.invoke('summarize this video')