In [19]:
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_openai import OpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [4]:
loader = PyPDFLoader("data/relativity.pdf")
documents = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

In [6]:
embeddings = OpenAIEmbeddings()

In [7]:
db = FAISS.from_documents(docs, embeddings)

In [9]:
llm = OpenAI()

In [23]:
prompt = ChatPromptTemplate.from_template(
    """
Answer the question based on the context provided.
Think step by step before providing a detailed answer.
<context>
{context}
</context>
Question: {input}"""
)

In [24]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [25]:
retriever = db.as_retriever()

In [26]:
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [30]:
question = "What is space time?"

In [31]:
response = retrieval_chain.invoke({"input": question})

In [32]:
response

{'input': 'What is space time?',
 'context': [Document(page_content='Part I: The Special Theory of Relativity\nAlbert Einstein 51Minkowski\'s Four-Dimensional\nSpace\n \nThe non-mathematician is seized by a mysterious shuddering when he hears of "four-\ndimensional" things, by a feeling not unlike that awakened by thoughts of the occult. And\nyet there is no more common-place statement than that the world in which we live is a four-\ndimensional space-time continuum.\nSpace is a three-dimensional continuum. By this we mean that it is possible to describe\nthe position of a point (at rest) by means of three numbers (co-ordinales) x, y, z, and thatthere is an indefinite number of points in the neighbourhood of this one, the position of\nwhich can be described by co-ordinates such as x\n1, y 1, z1, which may be as near as we\nchoose to the respective values of the co-ordinates x, y, z, of the first point. In virtue of thelatter property we speak of a " continuum," and owing to the fact th