In [1]:
!pwd
# !ls

/home/dephinate/ASU/DL/MedicalChatBot


In [2]:
import os
from pathlib import Path

Installations

In [3]:
# # GPU llama-cpp-python
# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip -I install llama-cpp-python numpy --force-reinstall --upgrade --no-cache-dir --verbose
# !pip install huggingface_hub
# !pip install -I llama-cpp-python
# !pip install numpy
# !pip install accelerate

In [4]:
# !pip install ctransformers
# !pip install sentence-transformers
# !pip install pinecone-client
# !pip install langchain
# !pip install langchain_pinecone
# !pip install flask
# !pip install pypdf
# !pip install ctransformers[cuda]

Import libraries    
we need the following:  
* something for document data
* something for splitting
* something for embeddings
* something for vectior indexing
* something for prompts
* something for llm

In [5]:
# Loader
import langchain
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
import pypdf

# Splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Embeddings
from langchain.embeddings import HuggingFaceEmbeddings

# Vector Store
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore

# Prompts
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

# Models
from langchain.llms import CTransformers
from langchain_community.llms import LlamaCpp

# Chains
from langchain.chains import RetrievalQA
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema.output_parser import StrOutputParser



  from tqdm.autonotebook import tqdm


1) Load Data

In [6]:
# Extract data from the pdf
def load_pdf(path:Path):
    loader = DirectoryLoader(   # To load all pdfs from a directory
        path=path,
        glob="*.pdf",
        loader_cls=PyPDFLoader,
        show_progress=True
    )
    documents = loader.load()
    return documents


In [7]:
extracted_data = load_pdf("./data")


  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:13<00:00, 13.66s/it]


2) Split Data

In [8]:
# parameters: chunk size and chunk overlap
def text_split(chunk_size:int, chunk_overlap:int, extracted_data:None):
    splitter = RecursiveCharacterTextSplitter(chunk_size = chunk_size, chunk_overlap = chunk_overlap, separators=['\n\n', '\n', '.', ','])
    chunks = splitter.split_documents(extracted_data)
    return chunks

In [9]:
chunks = text_split(chunk_size=500,chunk_overlap=50,extracted_data=extracted_data)

Create Embeddings

In [10]:
# fucntion to download hugging face embeddings
def download_embeddings_from_huggingface(model_name:str):
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return embeddings

In [11]:
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = download_embeddings_from_huggingface(model_name=embedding_model)

Pinecone

In [12]:
from research import PINECONE_API_ENV, PINECONE_API_KEY

In [13]:
# Initialize index
pc = Pinecone(
    api_key = PINECONE_API_KEY,
)
pc.list_indexes().names() # just to doublecheck if I am able to connect to my Index
index_name = "medical-chatbot"
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.07166,
 'namespaces': {'medicalChatBot': {'vector_count': 7166}},
 'total_vector_count': 7166}

In [14]:
# Clean DB
# index.delete(delete_all=True,namespace="medicalChatBot")

In [15]:
# Initialize Vectorstore
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embeddings,
    namespace="medicalChatBot",
    index_name='medical-chatbot'
)

In [16]:
# Add records
# vectorstore.add_texts(texts=[t.page_content for t in chunks])

In [17]:
# Check DB Status
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.07166,
 'namespaces': {'medicalChatBot': {'vector_count': 7166}},
 'total_vector_count': 7166}

In [18]:
def format_docs(docs):
    numbered_docs = []
    for i, doc in enumerate(docs, start=1):  # Enumerate starting from 1
        numbered_docs.append(f"{i}. {doc.page_content}")  # Add number and content
    return "\n".join(numbered_docs)  # Join with newlines


In [45]:
# Similarity search example
query = "What?"

chunks_retrieved = vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)
chunks_retrieved_formatted = format_docs(chunks_retrieved)
print("Type: ",type(chunks_retrieved_formatted))
print("Len: ",len(chunks_retrieved_formatted))
print("Chunks retrieved: \n",chunks_retrieved_formatted)

Type:  <class 'str'>
Len:  14
Chunks retrieved: 
 1. .
2. .
3. .


In [20]:
chunks_retrieved

[Document(page_content='Nancy J. Nordenson\nAcid reflux seeHeartburn\nAcidosis seeRespiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when thepores of the skin become clogged with oil, dead skincells, and bacteria.\nDescription\nAcne vulgaris, the medical term for common acne, is'),
 Document(page_content='The goal of treating moderate acne is to decrease\ninflammation and prevent new comedone formation. Oneeffective treatment is topical tretinoin along with a topical\nGALE ENCYCLOPEDIA OF MEDICINE 2 25Acne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceousglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)GEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(page_content='ent purposes. For example, lotions, soaps, gels, andcr

Model Initialization

In [None]:
# Using CTransformers
llm=CTransformers(model="llama-2-7b-chat.Q5_K_M.gguf",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [21]:
# Using LlamaCPP
lcpp_llm = None
lcpp_llm = LlamaCpp(
    model_path="llama-2-7b-chat.Q5_K_M.gguf",
    n_gpu_layers=32,
    n_batch=512,
    n_ctx=1024,
    f16_kv=True, 
    temperature = 0.8
    )

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 

Prompting

In [None]:
# Template String
template1="""
[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
Use the Context to answer the Question.
Context: {context}
Question: {question}[/INST]

"""

In [None]:
template2="""
Answer the question based on the context below.
If you don't know the answer, only return I don't know, Do not make an answer.

Context: {context}
Question: {question}

Answer:
"""

In [None]:
template3 = """
<<USER>> {question}

<<SYS>>
Answer the question based on the context below..
{context}
system_prompt: "You are a helpful assistant, below is ... llama-2-7b-chat.Q4_K_M.gguf"
"""

In [81]:
# Most effective Template String
template4="""
[INST] <<SYS>>
Answer the query based on the information provided to you. If you don't know the answer, return 'I do not know', Do not make an answer.
<</SYS>>
query:{question}
information:{context}[/INST]

"""

In [80]:
# Most effective Template String
template5="""
[INST] <<SYS>>
Answer the query based on the information. If you don't know the answer, return 'I do not know', Do not make an answer.
<</SYS>>
query:{question}
information:{context}[/INST]

"""

In [82]:
prompt = PromptTemplate.from_template(template5)
prompt

PromptTemplate(input_variables=['context', 'question'], template="\n[INST] <<SYS>>\nAnswer the query based on the information. If you don't know the answer, return 'I do not know', Do not make an answer.\n<</SYS>>\nquery:{question}\ninformation:{context}[/INST]\n\n")

Langchain Chain      
1) Method 1: Using RetrievalQA  
2) Method 2: Usimg Pipe operators

In [83]:
# Method 1
qa=RetrievalQA.from_chain_type(
    llm=lcpp_llm, 
    chain_type="stuff", 
    retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True, 
    chain_type_kwargs={"prompt": prompt})

In [84]:
response = qa.invoke({"query": "What is furniture?"})


Llama.generate: prefix-match hit

llama_print_timings:        load time =   23373.24 ms
llama_print_timings:      sample time =      37.75 ms /   202 runs   (    0.19 ms per token,  5350.99 tokens per second)
llama_print_timings: prompt eval time =   14461.66 ms /   280 tokens (   51.65 ms per token,    19.36 tokens per second)
llama_print_timings:        eval time =   60924.87 ms /   201 runs   (  303.11 ms per token,     3.30 tokens per second)
llama_print_timings:       total time =   75849.06 ms /   481 tokens


In [85]:
response['source_documents']


[Document(page_content='suitable surfaces. The space can be as simple as a kitchen\nor office table, or as fancy as a specialized artist’s studio.\nThe artist should have adequate time to become'),
 Document(page_content='stances that may predispose an individual to BED includeheredity and affective disorders, such as major depression.BED patients are also more likely to have a comorbid, orco-existing, diagnosis of impulsive behaviors (for exam-ple, compulsive buying), post-traumatic stress disorder\n(PTSD), panic disorder , or personality disorders .Individuals who develop BED often come from fam-'),
 Document(page_content='ry is that the world and body are made up of five mainelements: wood, fire, earth, metal, and water. These ele-ments are all interconnected, and each element eithergenerates or controls another element. For instance,water controls fire and earth generates metal. Each organis associated with one of the five elements. The Chinesesystem uses elements and organs to des

In [86]:
response['result']

"Furniture refers to movable objects that are designed to provide functional support or comfort in a particular space, such as a kitchen or office. According to the information provided, the artist should have adequate time to create furniture that suits their needs and preferences. This could include creating simple or elaborate pieces, such as a kitchen table or a specialized artist's studio.\nBED, or Binge Eating Disorder, is a condition characterized by recurring episodes of binge eating, followed by feelings of guilt or shame. According to the information provided, factors that may predispose an individual to BED include heredity and affective disorders, such as major depression. BED patients are also more likely to have comorbid diagnoses of impulsive behaviors (such as compulsive buying), post-traumatic stress disorder (PTSD), panic disorder, or personality disorders."

In [5]:
# Method 2
prompt = ChatPromptTemplate.from_template(template4)
prompt

NameError: name 'ChatPromptTemplate' is not defined

In [None]:
retrieval = RunnableParallel(
    {"context": vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) | format_docs, "question": RunnablePassthrough()}
)

chain = retrieval | prompt | lcpp_llm | StrOutputParser()

In [None]:
retrieval.invoke("Caffien?")

In [None]:
(retrieval|prompt).invoke("what is Acne and its diffent types and treatment?")

In [None]:

result = chain.invoke("what is Acne and its diffent types and treatment?")
result

In [None]:
print(result)

In [None]:
lcpp_llm.invoke("what is Acne and its diffent types and treatment?")

In [None]:
print(result)