In [71]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI , GoogleGenerativeAIEmbeddings
from langchain_core.runnables import RunnableParallel , RunnableLambda , RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

In [72]:
load_dotenv()

True

In [73]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [74]:
video_id = "lRjprPQHuXw"

try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
    transcript = " ".join(chunk['text'] for chunk in transcript_list)
except TranscriptsDisabled:
    print('No caption for this video') 
    

In [75]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
chunk = splitter.create_documents([transcript])


In [76]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vector_store = FAISS.from_documents(chunk,embeddings)
print()




In [77]:
retriver = vector_store.as_retriever(search_type = "similarity", search_kwarg = {'k':4} )

In [78]:
prompt = PromptTemplate(
    template="""
        You are a helpful assistant.
        Answer ONLY from the provided transcript context.
        If the context is insufficient, just say you don't know.

        {context}
        Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [79]:
context_text = '\n\n'.join(doc.page_content for doc in retrived_doc)

final_prompt = prompt.invoke({'context': context_text, 'question': question})

In [80]:
def formate_doc(retrived_doc):
    context_text = '\n\n'.join(doc.page_content for doc in retrived_doc)
    return context_text

In [81]:
parallel_chain = RunnableParallel({
    'context': retriver | RunnableLambda(formate_doc),
    'question': RunnablePassthrough()
})

In [82]:
parser = StrOutputParser()

In [83]:
main_chain = parallel_chain | prompt | llm | parser

In [84]:
main_chain.invoke('can u summarize the video')

'The transcript covers several topics:\n\n1.  **Advice for building something new:** It suggests a 25-year-old wanting to build something should conduct thorough research, possess patience and passion, choose a narrow niche to focus on, and view failures as learnings.\n2.  **Product Engineering (Camera Example):** It discusses the complexity of product engineering, highlighting various fields like radio antennas, industrial design, cameras, and software. It emphasizes the need for significant investment, time, and energy, and the importance of understanding underlying principles rather than just outsourcing. The camera is used as an example, explaining how a phone recognizes scenes (e.g., nighttime, people) and how large teams are needed to define and tune each scene for optimal photo quality.\n3.  **Impact of Technology/AI on Learning and Jobs:** The discussion touches on how emotional intelligence makes humans unique, suggesting that while 80% of jobs might be automated, 20% will sti