In [31]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain. vectorstores import FAISS

In [32]:
DATA_PATH = "data/"
DB_FAISS_PATH = "vectorstores/db_faiss"

# load document and create embeddings

In [33]:
loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
documents = loader. load( )
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 100)
texts = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2',
model_kwargs = {'device': 'cpu'})

2024-01-12 20:13:37 - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


## create vector db

In [None]:
db = FAISS.from_documents(texts,embeddings)

In [35]:
db.save_local(DB_FAISS_PATH)

# model load and test part

In [36]:
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain. llms import CTransformers
from langchain.chains import RetrievalQA

In [37]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, please just say that you don't know the answer, don't try to make up
an answer. 
Context: {context}
Question: {question}. 
Only returns the helpful answer below and nothing else.
Helpful answer:
"""

In [38]:
def set_custom_prompt():
    prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context','question'])
    return prompt

def load_llm():
    llm = CTransformers(
    model = "llama-2-7b-chat.ggmlv3.q8_0.bin",
    model_type = "llama",
    max_new_tokens = 512,
    temperature = 0.1)

    return llm


def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents = True,
    chain_type_kwargs = {'prompt': prompt})

    return qa_chain



In [39]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
db = FAISS. load_local(DB_FAISS_PATH, embeddings)
llm = load_llm()

2024-01-12 20:19:04 - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


In [40]:
def qa_bot():
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)

    return qa

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

In [None]:
res=final_result(input())

In [30]:
res

{'query': 'what is arm?',
 'result': 'Arm is a computer architecture and instruction set that is widely used in microprocessors, microcontrollers, and other digital circuits. It was developed by Arm Limited (formerly Acorn Computers Limited), a British company that was acquired by SoftBank Group in 2016. The Arm architecture is designed to be efficient in terms of power consumption and cost, making it well-suited for use d for use in gged for aited for aited for embedded systems such applications such ased in gged for embedded systems thated inited for use inated for mobile devices such applications such applications such applications such applications such applications such ased in tol tted for use d for use d for use dited for embedding into aited for use d for embedded systems such ased ined ined inied for aited for use d for use d for aited for aited for use d for use for embedded systems that for embedded systems where small and for mobile devices thatched forted for battery forge