In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [3]:
PINECONE_KEY = os.getenv("PINECONE_API_KEY")

In [4]:
def load_pdf(path):
    loader = DirectoryLoader(path,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf("data/")

extracted_data[10]

Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'data\\Medical_book.pdf', 'total_pages': 637, 'page': 10, 'page_label': '11'}, page_content='Rhonda Cloos, R.N.\nMedical Writer\nAustin, TX\nGloria Cooksey, C.N.E\nMedical Writer\nSacramento, CA\nAmy Cooper, M.A., M.S.I.\nMedical Writer\nVermillion, SD\nDavid A. Cramer, M.D.\nMedical Writer\nChicago, IL\nEsther Csapo Rastega, R.N., B.S.N.\nMedical Writer\nHolbrook, MA\nArnold Cua, M.D.\nPhysician\nBrooklyn, NY\nTish Davidson, A.M.\nMedical Writer\nFremont, California\nDominic De Bellis, Ph.D.\nMedical Writer/Editor\nMahopac, NY\nLori De Milto\nMedical Writer\nSicklerville, NJ\nRobert S. Dinsmoor\nMedical Writer\nSouth Hamilton, MA\nStephanie Dionne, B.S.\nMedical Writer\nAnn Arbor, MI\nMartin W. Dodge, Ph.D.\nTechnical Writer/Editor\nCentinela Hospital and Medical\nCenter\nInglewood, CA\nDavid Doermann\nMedical Wri

In [6]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20)
    
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks
    

In [7]:
text_chunks = text_split(extracted_data)
print(f"", len(text_chunks))

 5859


In [8]:
def download_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_huggingface_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
query_result = embeddings.embed_query("Hello world")
print(f"Lenght: {len(query_result)}")


Lenght: 384


In [12]:
pc = Pinecone(api_key=PINECONE_KEY)
index_name = 'medical-bot-index'

In [13]:
index = pc.Index(name=index_name)

In [14]:
# docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [15]:
#docsearch = vector_store.add_texts(texts=[t.page_content for t in text_chunks])

In [16]:
query = "What are Allergies"

docs = vector_store.similarity_search(query, k=3)
for res in docs:
    print(f"* {res.page_content} [{res.metadata}]")

* Purpose
Allergy is a reaction of the immune system. Nor-
mally, the immune system responds to foreign microor-
ganisms and particles, like pollen or dust, by producing
specific proteins called antibodies that are capable of
binding to identifying molecules, or antigens, on the
foreign organisms. This reaction between antibody and
antigen sets off a series of reactions designed to protect
the body from infection. Sometimes, this same series of [{}]
* reaction. Allergic rhinitis is characterized by an itchy,
runny nose, often with a scratchy or irritated throat due
to post-nasal drip. Inflammation of the thin membrane
covering the eye (allergic conjunctivitis) causes redness,
irritation, and increased tearing in the eyes. Asthma caus-
es wheezing, coughing, and shortness of breath. Symp-
toms of food allergies depend on the tissues most sensi-
tive to the allergen and whether the allergen spread sys- [{}]
* KEY TERMS
Allergen —A substance that provokes an allergic
response.
Anaphylaxis

In [17]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up the answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [18]:
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
chain_type_kwargs = {'prompt': prompt}

In [26]:
# llm = CTransformers(model="model/llama-2-13b-chat.Q4_K_M.gguf",
#                     model_type='llama',
#                     config={'max_new_tokens': 512,
#                             'temperature': 0.8})
from langchain_community.llms import LlamaCpp
llm = LlamaCpp(
    model_path='model/llama-2-13b-chat.Q4_K_M.gguf',
    n_ctx=4096,
    n_gpu_layers=30,
    n_threads=0,
    temperature=0.8,
    max_tokens=512
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from model/llama-2-13b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120


llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 13824
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 40
llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 40
llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
llama_model_loader: - kv  10:                          general.file_type u32              = 15
llama_model_loader: - kv  11:                       tokenizer.ggml.model str              = llama
llama_model_loader: - kv  12:                      tokenizer.ggml.tokens arr[str,32000]   = ["<unk>", "<s>", "</s>", "<0x00>", "<...
llama_model_loader: - kv  13:                      tokenizer.ggml.scores arr[f32,32000]   = [0

In [27]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=vector_store.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [22]:
while True:
    user_input = input(f"Input Prompt:")
    result = qa.invoke({'query': user_input})
    print("Response: ", result['result'])

llama_perf_context_print:        load time =   38434.31 ms
llama_perf_context_print: prompt eval time =   38426.49 ms /   265 tokens (  145.01 ms per token,     6.90 tokens per second)
llama_perf_context_print:        eval time =    6964.69 ms /    25 runs   (  278.59 ms per token,     3.59 tokens per second)
llama_perf_context_print:       total time =   45489.31 ms /   290 tokens
llama_perf_context_print:    graphs reused =         48


Response:  Acne is the general name given to a skin disorder in which the sebaceous glands become inflamed.


Llama.generate: 50 prefix-match hit, remaining 106 prompt tokens to eval


KeyboardInterrupt: 

In [28]:
qa.invoke(query)

llama_perf_context_print:        load time =   46053.24 ms
llama_perf_context_print: prompt eval time =   46050.10 ms /   330 tokens (  139.55 ms per token,     7.17 tokens per second)
llama_perf_context_print:        eval time =   10516.13 ms /    36 runs   (  292.11 ms per token,     3.42 tokens per second)
llama_perf_context_print:       total time =   56740.80 ms /   366 tokens
llama_perf_context_print:    graphs reused =         64


{'query': 'What are Allergies',
 'result': 'Allergies are an overreaction of the immune system that can cause physical symptoms such as a runny nose, itchy eyes, and scratchy throat.',
 'source_documents': [Document(id='2ce2d896-2344-4ac4-bd77-4e31def5b3e1', metadata={}, page_content='Purpose\nAllergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-\nganisms and particles, like pollen or dust, by producing\nspecific proteins called antibodies that are capable of\nbinding to identifying molecules, or antigens, on the\nforeign organisms. This reaction between antibody and\nantigen sets off a series of reactions designed to protect\nthe body from infection. Sometimes, this same series of'),
  Document(id='55ec0f50-0057-4d75-87cd-3e79332970d0', metadata={}, page_content='reaction. Allergic rhinitis is characterized by an itchy,\nrunny nose, often with a scratchy or irritated throat due\nto post-nasal drip. Inflammation of the thin membrane\nco