In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as PC2
# import pinecone 
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [3]:
def load_pdf(data):
    loader = DirectoryLoader(data,glob="*.pdf",loader_cls = PyPDFLoader)
    docs = loader.load()
    return docs

In [4]:
extracted_data = load_pdf("data/")

In [5]:
# extracted_data

In [6]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)

In [8]:
len(text_chunks)

7020

In [7]:
def download_embedding():
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [8]:
import sentence_transformers
embeddings = download_embedding()

In [2]:
from pinecone import Pinecone

  from tqdm.autonotebook import tqdm


In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
p_key = os.getenv('PINECONE_API_KEY')

In [5]:
pc = Pinecone(api_key = p_key)

In [9]:
index_name = 'medical-chatbot'
docsearch = PC2.from_existing_index(index_name,embeddings)

In [14]:
index_name = "medical-chatbot"
index = pc.Index(index_name)

In [15]:
docsearch = PC2.from_texts([t.page_content for t in text_chunks],embeddings,index_name=index_name)

In [10]:
docs = docsearch.similarity_search("what are allergies?")

In [17]:
docs

[Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"),
 Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• nuts, especially peanuts, walnu

In [11]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [12]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [13]:
# from ctransformers import AutoModelForCausalLM

llm = CTransformers(model=r'model\llama-2-7b-chat.Q5_K_M.gguf',model_type='llama',config={'max_new_tokens':512,'temperature':0.8})
# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file=r"C:\Users\ZypheR09\PycharmProjects\Teach_Chomu\models\Llama-2-7B-Chat-GGUF\llama-2-7b-chat.Q6_K.gguf", model_type="llama", gpu_layers=32)


In [14]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [15]:
llm.invoke('What is apple?')

'\n literally an apple a day keeps the doctor away.\n\nApple is a type of fruit that grows on the Malus domestica tree. It is one of the most widely cultivated and consumed fruits in the world, with over 7,000 varieties grown across the globe. Apples are known for their juicy flesh, crisp texture, and sweet flavor, which make them a popular choice for snacking, cooking, and baking.\nThe phrase "an apple a day keeps the doctor away" is a common proverb that suggests eating an apple daily can help maintain good health and prevent illnesses. While there is no scientific evidence to support this claim, apples are indeed rich in several nutrients and antioxidants that can contribute to overall health and well-being. These nutrients include fiber, vitamins, minerals, and polyphenols, which have been shown to have anti-inflammatory and anticancer properties.\nIn addition to their potential health benefits, apples are also an important crop from an economic perspective. They are grown in many 