In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
## Data Loading from PDF file
from langchain_community.document_loaders import PyPDFLoader
pdfLoader = PyPDFLoader('attention.pdf')
pdf_documents = pdfLoader.load()
pdf_documents[:3]

In [None]:
# Convert the entire PDF document into smaller chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
transformed_doc = text_splitter.split_documents(pdf_documents)
transformed_doc[:3]

In [None]:
# Vector embedding : OpenAI embeddings
# Vector store :  FAISS

# Create the vector embedding from the chunked document and store it in Vector store
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenAIEmbeddings

db = FAISS.from_documents(transformed_doc[:30], OpenAIEmbeddings())
db

In [None]:
# We can directly query the vector store using similarity search
result = db.similarity_search("What is attention")
result[0].page_content

#### Our Vector store is now ready with vector embedding. Let's design a Retrieval Chain

In [None]:
# MODEL : Use LLAMA2 on Ollama
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama2")

In [None]:
# Design a chat prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(""" 
            Answer the questions based on the context. 
            Think twice before giving the answer. <context>{context}</context>
            Question : {input}""")

In [None]:
# Create document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm, prompt)

# Create retriever
retriever = db.as_retriever()
retriever

# Create Retrieval chain
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
# Query the retrieval chain
retrieval_chain.invoke({"input" : "An attention function is defined as a mapping query"})