In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://en.wikipedia.org/wiki/OpenAI")
document = loader.load()
document

[Document(page_content='\n\n\n\nOpenAI - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\n Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1History\n\n\n\nToggle History subsection\n\n\n\n\n\n1.12015–2018: Non-profit beginnings\n\n\n\n\n\n\n\n1.22019: Transition from non-profit\n\n\n\n\n\n\n\n1.32020–2023:

In [3]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_objectbox import ObjectBox
from langchain_text_splitters import RecursiveCharacterTextSplitter

embeddings = OpenAIEmbeddings()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_docs = text_splitter.split_documents(document)
vectors = ObjectBox.from_documents(documents=final_docs, embedding=embeddings, embedding_dimensions=768)

retriever = vectors.as_retriever()


In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers.string import StrOutputParser
from langchain import hub

llm = ChatOpenAI(model="gpt-4o")
prompt = hub.pull('rlm/rag-prompt')
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [15]:
from langchain.chains.retrieval_qa.base import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectors.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)


In [18]:
response = qa_chain.invoke({"query": "Who is the founder of OpenAI?"})
response["result"]

'OpenAI was founded by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and Wojciech Zaremba. Sam Altman and Elon Musk were initial Board of Directors members.'

In [19]:
response = qa_chain.invoke({"query": "What is the current worth of OpenAI?"})
response["result"]

'The current worth of OpenAI is not explicitly stated in the provided context. However, Microsoft invested a total of $11 billion in OpenAI in 2019 and 2023.'

In [20]:
response = qa_chain.invoke({"query": "what are the products are designed or manufactured by OpenAI?"})
response["result"]

'OpenAI has designed and manufactured products like GPT-4 and a text-to-video model named Sora.'