In [3]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

def load_pdf(data):
  loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
  documents  = loader.load()  
  return documents

In [5]:
extracted_data = load_pdf("data/")

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def text_split(extracted_data):
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 20,
    )
  text_chunks = text_splitter.split_documents(extracted_data)
  return text_chunks

In [11]:
text_chunks = text_split(extracted_data)

In [12]:
from langchain_community.embeddings import HuggingFaceEmbeddings

def embedding_model():
  model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")
  return model

In [14]:
embedding_model = embedding_model()

In [19]:
from langchain_chroma import Chroma

db = Chroma.from_documents(text_chunks, embedding_model, persist_directory="./chroma_db")

In [20]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [24]:
from langchain.prompts import PromptTemplate

PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}


In [26]:
from langchain_community.llms import CTransformers

llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [27]:
from langchain.chains import RetrievalQA

qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)