In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
!pip install -q youtube-transcript-api langchain-community langchain-openai faiss-cpu tiktoken python-dotenv

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [None]:
#video_id="LPZh9BOjkQs"
#video_id="zfp5DEisdcs"
video_id="Gfr50f6ZBvo"
try:
    transcript_lists = YouTubeTranscriptApi().fetch(video_id=video_id, languages=["en"])
    transcript = " ".join(chunk.text for chunk in transcript_lists)
    print(transcript_lists)
    print(transcript)
except TranscriptsDisabled:
    print("Transcripts are disabled for this video.")

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])


In [None]:
len(chunks)

In [None]:
chunks[0]

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(documents=chunks, embedding=embeddings)


In [None]:
vectorstore.index_to_docstore_id

In [None]:
vectorstore.get_by_ids(['8d5401f4-854d-4bc0-a276-3b36e1bbff7b'])

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [None]:
retriever

In [None]:
retriever.invoke("What is deepmind")

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided trascript context.
    If the context is insufficient, just say you don't know.

    {context}
    Question : {question}
    """,
    input_variables=['context','question']
)

In [None]:
question = "is the topic aliens discussed in this video? if yes then what was discussed ?"
retrieved_docs = retriever.invoke(question)


In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
final_prompt = prompt.invoke({"context":context_text, "question":question})


In [None]:
answer=llm.invoke(final_prompt)
print(answer.content)

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrived_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [None]:
parallel_chain = RunnableParallel({
    'context':retriever|RunnableLambda(format_docs),
    'question':RunnablePassthrough()
})

parallel_chain.invoke("Who is demis")

In [None]:
parser=StrOutputParser()

In [None]:
main_chain = parallel_chain|prompt|llm|parser
main_chain.invoke("Can you summarie the video")