In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI

from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

import bs4
import getpass
import os

In [2]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()
llm = ChatOpenAI(model="gpt-4o-mini")

 ········


In [3]:
loader = WebBaseLoader(["https://en.wikipedia.org/wiki/2024_Summer_Olympics"])
documents = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

In [5]:
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))
retriever = vectorstore.as_retriever()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [6]:
template = """Answer the question based only on the following context:
                {context}

                Question: {question}
                """
prompt = ChatPromptTemplate.from_template(template)

In [7]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [8]:
response = rag_chain.invoke("Where are the 2024 summer olympics being held?")
print(response)

The 2024 Summer Olympics are being held in Paris, France, with events also taking place in 16 additional cities across Metropolitan France and one subsite in Tahiti, French Polynesia.


In [9]:
result = rag_chain.invoke("What are the new sports that are being added for the 2024 summer olympics?")
print(result)

The new sport being added for the 2024 Summer Olympics is breaking, which is making its Olympic debut. Additionally, skateboarding, sport climbing, and surfing are returning to the programme after debuting at the 2020 Summer Olympics.


In [10]:
result = rag_chain.invoke("How many volunteers are supposed to be present for the 2024 summer olympics?")
print(result)

There are expected to be 45,000 volunteers recruited for the 2024 Summer Olympics.
