In [None]:
!pip install faiss-cpu
!pip install annoy


In [1]:
# Import necessary libraries for embeddings, document processing, and vector store
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers


In [2]:
# Function to load PDF documents from a directory
def load_pdf(data_path):
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

# Load the PDFs from the "data" folder
extracted_data = load_pdf("data/")

# Function to split the documents into smaller text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

# Split the extracted data into text chunks
text_chunks = text_split(extracted_data)
print(f"Number of chunks: {len(text_chunks)}")


Number of chunks: 7020


In [3]:
# Function to download Hugging Face embeddings
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

# Download embeddings
embeddings = download_hugging_face_embeddings()

# Example query embedding check
query_result = embeddings.embed_query("Hello world")
print(f"Query Embedding Length: {len(query_result)}")


  from .autonotebook import tqdm as notebook_tqdm


Query Embedding Length: 384


In [4]:
# Initialize FAISS vector store from text chunks and embeddings
docsearch = FAISS.from_texts([t.page_content for t in text_chunks], embeddings)

# Example query to search in the FAISS index
query = "What are allergies?"
docs = docsearch.similarity_search(query, k=3)
print("Top 3 Documents for the Query:", docs)


Top 3 Documents for the Query: [Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE", metadata={}), Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={}), Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens in

In [5]:
# Define the prompt template to be used for the chatbot
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

# Create a prompt object
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Specify chain type arguments
chain_type_kwargs = {"prompt": PROMPT}


In [6]:
# Initialize the LLM with the pre-trained model
llm = CTransformers(
    model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={'max_new_tokens': 512, 'temperature': 0.8}
)


In [7]:
# Setup the RetrievalQA chain with the LLM and document retriever
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)


In [8]:
# Chat loop for user interaction
while True:
    try:
        # Get user input directly in the terminal
        user_input = input(f"\nInput your question (type 'exit' to stop): ")

        # Break loop if user types 'exit'
        if user_input.lower() == 'exit':
            print("\nExiting chat... Goodbye!")
            break
        
        # Get response from the QA chain
        result = qa({"query": user_input})
        
        # Extract the response
        response = result["result"]
        
        # Display both the question and the response in the terminal
        print(f"\nUser Question: {user_input}")
        print(f"Bot Response: {response}\n")
    
    except KeyboardInterrupt:
        # Allow the user to exit by pressing Ctrl+C
        print("\nExiting chat...")
        break



User Question: what is alzheimers
Bot Response: Alzheimer's disease (AD) is a neurologic disorder characterized by progressive cognitive decline, including memory loss, confusion, and difficulty with communication and daily activities. It is the most common form of dementia, affecting an estimated 5.8 million Americans, according to the Alzheimer's Association. The exact cause of AD is not yet fully understood, but it is believed to be caused by a combination of genetic, lifestyle, and environmental factors. There is currently no cure for AD, but there are several medications and therapies that can help manage its symptoms and slow its progression.


User Question: what is migraine and how do i cure it?
Bot Response: Migraines are a type of headache that can cause intense pain, often on one side of the head, along with other symptoms like nausea, vomiting, and dizziness. The exact cause of migraines is not fully understood, but they may be related to changes in blood flow, hormonal ch