In [1]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
import os

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
#os.environ["OPENAI_API_KEY"]="Put your OpenAI API Keys here"

In [3]:
URL1="https://techcrunch.com/2024/03/04/anthropic-claims-its-new-models-beat-gpt-4/"
URL2="https://techcrunch.com/2024/03/28/ai21-labs-new-text-generating-ai-model-is-more-efficient-than-most/"

In [4]:
loader = WebBaseLoader([URL1, URL2])
data = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
chunks = text_splitter.split_documents(data)

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [7]:
vector = FAISS.from_documents(chunks, embeddings)
retriever = vector.as_retriever()

In [8]:
prompt_template = """
    Answer the question {input} based solely on the context below:
    \n\n'<context>\n{context}\n</context>'
    If you can't find an answer, say I don't know.
    """
prompt = PromptTemplate.from_template(prompt_template)

In [9]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)

In [10]:
combine_docs_chain = create_stuff_documents_chain(llm, prompt)

In [11]:
chain = create_retrieval_chain(retriever, combine_docs_chain)

In [12]:
result = chain.invoke({"input": "List the models and their token size of models only from Anthropic and Meta"})

In [13]:
print(result['answer'])

- Anthropic: Claude 3 (200,000-token context window)
- Meta: Llama 2 (~4,000-token context window)
- Meta: Gemini 1.5 Pro (up to a million-token context window)

I don't know.
