In [21]:
import os
from dotenv import load_dotenv

from langchain_community.document_loaders import YoutubeLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

from langchain.vectorstores import FAISS

from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

from langchain.prompts import ChatPromptTemplate

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [7]:
url = "https://www.youtube.com/watch?v=AcwdqisUODw"
loader = YoutubeLoader.from_youtube_url(url)
transcript = loader.load()

In [8]:
print(transcript)

[]


In [9]:
if transcript:
    print("Hello")
else:
    print("No transcript found.")

No transcript found.


In [10]:
url = "https://www.youtube.com/watch?v=iEGgHbmk6pU"
loader = YoutubeLoader.from_youtube_url(url)
transcript = loader.load()

In [14]:
len(transcript)

1

In [16]:
print(len(transcript[0].page_content))

1179


In [17]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
docs = text_splitter.split_documents(transcript)

In [18]:
len(docs)

5

In [20]:
print(docs[-1].page_content)

it's going to work out very well. But no, it's at 145%. There will not be anywhere near that number.


In [22]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [23]:
vector_store = FAISS.from_documents(documents=docs, embedding=embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [24]:
llm = ChatGroq(model="deepseek-r1-distill-llama-70b")

In [25]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant answer the  questions based on the given context only. if the context does not have enough information then say answer is not in the provided content. Context is: {context}."),
    ("human", "{input}"),
])
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='You are a helpful assistant answer the  questions based on the given context only. if the context does not have enough information then say answer is not in the provided content. Context is: {context}.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

In [26]:
document_chain = create_stuff_documents_chain(llm, prompt)
retriever_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
result = retriever_chain.invoke({"input": "which countries are the center of discussion?"})

In [None]:
answer = result["answer"]
print(answer[(answer.find("</think>") + 8) :].strip())

The center of discussion involves two countries: the United States and China.
