In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import os

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
#os.environ["OPENAI_API_"]="Put your OpenAI API Keys here"

In [3]:
URL = "https://www.theverge.com/2024/4/18/24133808/meta-ai-assistant-llama-3-chatgpt-openai-rival"
loader = WebBaseLoader(URL)

In [4]:
pages = loader.load_and_split()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 50)
chunks = text_splitter.split_documents(pages)

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [7]:
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings)

In [8]:
retriever = vectorstore.as_retriever()

In [9]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [10]:
llm = ChatOpenAI()

In [11]:
template = """SYSTEM: You are a question answer bot. 
                 Be factual in your response.
                 Respond to the following question: {question} only from 
                 the below context :{context}. 
                 If you don't know the answer, just say that you don't know.
               """
prompt = PromptTemplate.from_template(template)

In [12]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [13]:
chain.invoke("What's the size of the largest Llama 3 model?")

'The largest Llama 3 model will have over 400 billion parameters.'