In [1]:
!pip install langchain --q
!pip install huggingface --q

In [2]:
!pip install sentence_transformers --q

In [15]:
!pip install ctransformers --q

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [3]:
!pip install faiss-cpu --q

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
DATA_PATH = '/kaggle/input/llmdata/'
DB_FAISS_PATH = 'vectorstore/db_faiss'


# create vector database

def create_vector_db():
    loader = DirectoryLoader(DATA_PATH,
                             glob='*.pdf',
                             loader_cls=PyPDFLoader)

    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                                   chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

In [7]:
create_vector_db()

Batches:   0%|          | 0/224 [00:00<?, ?it/s]

In [8]:
! wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q8_0.bin

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
--2023-08-23 08:01:13--  https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q8_0.bin
Resolving huggingface.co (huggingface.co)... 65.9.86.71, 65.9.86.79, 65.9.86.57, ...
Connecting to huggingface.co (huggingface.co)|65.9.86.71|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/30/e3/30e3aca7233f7337633262ff6d59dd98559ecd8982e7419b39752c8d0daae1ca/3bfdde943555c78294626a6ccd40184162d066d39774bd2c98dae24943d32cc3?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-chat.ggmlv3.q8_0.bin%3B+filename%3D%22llama-2-7b-chat.ggmlv3.q8_0.bin%22%3B&response-content-type=application%2Foctet-strea

In [11]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA

In [19]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just sayy that you don't know, don't try to make up an answer.

Context: {context}
Question : {question}

Only return the helpful answer below and nothing else.
Helpful answer: 
"""

def set_custom_prompt():
    """Prompt template for QA retrival for each vectorstore"""
    prompt = PromptTemplate(template = custom_prompt_template, 
                           input_variables = ['context','question'])
    
    return prompt

# Retrival QA chain
def retrieval_qa_chain(llm, prompt ,db):
    qa_chain = RetrievalQA.from_chain_type(llm = llm,
                                          chain_type = "stuff",
                                          retriever = db.as_retriever(search_kwargs = {'k': 2}),
                                          return_source_documents = True,
                                          chain_type_kwargs = {'prompt': prompt})
    return qa_chain


#loading the model
def load_llm():
    llm = CTransformers(
        model = '/kaggle/working/llama-2-7b-chat.ggmlv3.q8_0.bin',
        model_type = 'llama',
        max_new_token = 512,
        temperature = 0.5
    )
    
    return llm

#QA model function

def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})
    db = FAISS.load_local(DB_FAISS_PATH , embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)
    
    return qa

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response
    
    

In [20]:
final_result('what is allergic diseases')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'query': 'what is allergic diseases',
 'result': "Allergic diseases refer to a group of medical conditions that occur when the body's immune system overreacts to a particular substance, such as pollen, dust mites, or certain foods. These conditions can cause a range of symptoms, including sneezing, congestion, runny nose, itchy eyes, and skin rashes. Some common allergic diseases include asthma, eczema, and allergic rhinitis (also known as hay fever).",
 'source_documents': [Document(page_content='affects many patients who are not“allergic” in this way.', metadata={'source': '/kaggle/input/llmdata/71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf', 'page': 393}),
  Document(page_content='Allergic reactions involve a special set of cells in', metadata={'source': '/kaggle/input/llmdata/71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf', 'page': 135})]}

In [21]:
final_result('what are the diffenent of cough diseases.')['result']

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Cough diseases include both acute and chronic conditions, such as bronchitis (inflammation of the bronchi), pneumonia (infection of the lungs), tuberculosis (an infectious disease that primarily affects the lungs), and whooping cough (a highly contagious respiratory infection).'

In [22]:
final_result("What is difference between normal fever and hay fever")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'query': 'What is difference between normal fever and hay fever',
 'result': "I don't know the difference between normal fever and hay fever. According to the provided text, Hay fever refers to an allergic reaction that causes symptoms such as sneezing, runny nose, and itchy eyes. It is not related to a typical fever caused by an infection.",
 'source_documents': [Document(page_content='fevers (a source of its nickname, “undulant fever”) can beexhausting. Symptoms usually appear between five daysand a month after exposure and begin with a single boutof high fever accompanied by shivering, aching, anddrenching sweats that last for a few days. Other symp-toms may include headache , poor appetite, backache,\nweakness, and depression. Mental depression can be sosevere that the patient may become suicidal.\nIn rare, untreated cases, the disease can become so', metadata={'source': '/kaggle/input/llmdata/71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf', 'page': 619}),
  Document(page