In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA

In [None]:
DB_FAISS_PATH = "<path to where the faiss db needs to be created>"
DOCS_PATH = "<path to where folder where pdfs are stored>"

In [None]:
def create_embeddings(location):
    base = "<path where embeddings need to be created>"
    target_loc = base + "/faiss_db"
    loader = DirectoryLoader(location, glob = '*.pdf', loader_cls = PyPDFLoader)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
    texts = text_splitter.split_documents(documents)
    embeddings = OllamaEmbeddings(model="all-minilm")
    db = FAISS.from_documents(texts, embeddings)
    db.save_local(target_loc)

In [None]:
create_embeddings(DOCS_PATH)

In [None]:
llm = Ollama(model="mistral")

In [None]:
embeddings = OllamaEmbeddings(model="all-minilm")
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)

In [None]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    retriever = db.as_retriever(search_kwargs = {"k":2}),
    return_source_documents = True
)


In [None]:
question = "Question here"
response = qa({"query": question}, return_only_outputs=True)
print(response['result'])

In [None]:
print(response['source_documents'])