In [1]:
from langchain.document_loaders import WebBaseLoader                # Load a web page as a document
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Split the document into smaller chunks
from langchain.embeddings import OpenAIEmbeddings                   # Create embeddings for the text
from langchain.vectorstores import FAISS                            # Create a vector store for the embeddings
from langchain.memory import ConversationBufferMemory               # Create memory for the conversation
from langchain.llms import OpenAI as LLM                            # Use OpenAI's LLM for generating responses
from langchain.chains import ConversationalRetrievalChain           # Create a chain for conversational retrieval
from langchain.chat_models import ChatOpenAI                        # Use OpenAI's chat model for generating responses

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
url = "https://en.wikipedia.org/wiki/Main_Page" # contains news headlines, featured articles, etc.

In [3]:
loader = WebBaseLoader(url)

In [4]:
raw_documents = loader.load() # load the web page as a document

In [5]:
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(raw_documents) # split the document into smaller chunks

In [16]:
embedding = OpenAIEmbeddings()

In [7]:
vectorstore = FAISS.from_documents(documents, embedding) # create a vector store for the embeddings

In [15]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # create memory for the conversation

In [14]:
qa = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0, model="gpt-3.5-turbo"), # use OpenAI's chat model for generating responses
    vectorstore.as_retriever(), # use the vector store as a retriever
    memory=memory, # use the memory for the conversation
)

In [10]:
query = "What is the main news headlines today?" # query to ask the model

In [13]:
result = qa({"question": query}) # ask the model the query

In [12]:
result["answer"]

"The main news headlines today include Friedrich Merz being elected Chancellor of Germany, India conducting missile strikes on Pakistani targets, Zhao Xintong winning the World Snooker Championship, and the People's Action Party retaining a supermajority of seats in the Singaporean general election."