In [30]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from tqdm.autonotebook import tqdm
from pinecone import Pinecone

In [11]:
import os
!pwd

/config/workspace/Medical_chatbot


In [14]:
parent_dir = "/config/workspace/Medical_chatbot"
data_dir = "data"
path = os.path.join(parent_dir, data_dir)

In [None]:
os.mkdir(path)

In [6]:
!wget -P ./data https://www.ruseducation.in/books/Fundamentals-Neurology.pdf

--2024-06-04 18:29:18--  https://www.ruseducation.in/books/Fundamentals-Neurology.pdf
Resolving www.ruseducation.in (www.ruseducation.in)... 192.124.249.183
Connecting to www.ruseducation.in (www.ruseducation.in)|192.124.249.183|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32376969 (31M) [application/pdf]
Saving to: ‘./data/Fundamentals-Neurology.pdf’


2024-06-04 18:29:27 (4.01 MB/s) - ‘./data/Fundamentals-Neurology.pdf’ saved [32376969/32376969]



In [12]:
def load_pdf(path):
    loader = DirectoryLoader(path = path, glob= "*.pdf", loader_cls= PyPDFLoader)
    doc = loader.load()

    return doc

In [15]:
path

'/config/workspace/Medical_chatbot/data'

In [16]:
book = load_pdf(path)

In [23]:
def text_split(book: list) -> list:
    split_text = RecursiveCharacterTextSplitter(chunk_size = 400, chunk_overlap = 30)
    text_chunks = split_text.split_documents(book)

    return text_chunks

In [24]:
text_chunks = text_split(book)
len(text_chunks)

3227

In [25]:
emb = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2")



In [26]:
query = emb.embed_query("What is the capital of France?")

In [27]:
type(query)

list

In [29]:
len(query)

384

In [51]:
from dotenv import load_dotenv
load_dotenv()

True

In [52]:
pc = Pinecone(api_key= os.environ.get("PINECONE_API_KEY"))

In [34]:
from pinecone import ServerlessSpec

In [35]:
cloud = "aws"
region = "us-east-1"

In [36]:
spec = ServerlessSpec(cloud= cloud, region=region)

In [46]:
#pc.delete_index("demo-index")

In [40]:
index_name = "medical-chatbot"

if index_name not in pc.list_indexes().names():
    pc.create_index(name = index_name, dimension= 384, metric="cosine", spec = spec)

In [43]:
from langchain_pinecone import PineconeVectorStore

In [53]:
namespace = "try-1"


docsearch = PineconeVectorStore.from_texts(texts= [i.page_content for i in text_chunks], index_name= index_name, embedding= emb, namespace=namespace)

In [58]:
index = pc.Index(index_name)

In [59]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'try-1': {'vector_count': 3803}},
 'total_vector_count': 3803}

In [64]:
query = "Why am I having headaches"
res = docsearch.similarity_search(query= query, k =4)
print(res)

[Document(page_content='and tension-type headache.\nSymptomatic Headache\nSymptomatic headache is due to a structural lesion, in-\nfection, or inflammation of intra- and/or extracranial\ntissue. Its direct cause is often a pathological alteration of\nintracranial pressure, which excites nociceptive nerve\nendings in the meninges. The ICP may be either too high'), Document(page_content='head and/or facial pain, which may be quite severe. Even\ndiseases of the cervical spine can, rarely, produce head-\nache (spondylogenic headache).\nTable 13.7 provides an overview of the major causes of\nsymptomatic headache. We will describe a few of the\ncausative neurological illnesses and spondylogenic\nheadache, in detail in the following paragraphs.\nOcclusions and Dissections of Cranial Vessels'), Document(page_content='(particularly because of space-occupying lesions such ashematomas, tumors, and hydrocephalus) or too low\n(e. g., in the intracranial hypotension syndrome after a\nlumbar puncture

In [87]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")

from langchain_huggingface.llms import HuggingFacePipeline
from transformers import pipeline, set_seed
pipe = pipeline("text-generation", model = model, tokenizer = tokenizer, max_new_tokens = 300, num_return_sequences = 1 )
set_seed(42)
llm = HuggingFacePipeline(pipeline=pipe)

In [88]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [89]:
prompt = PromptTemplate.from_template(prompt_template)
#chain_type_kwargs = {"prompt" : prompt}

In [111]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff", 
    retriever = docsearch.as_retriever(search_kwargs = {"k" : 3})
    #return_source_documents = True
)

In [91]:
query1 = "Why am I having headaches for the past 3 days?"

query2 = "I have a tumor in my brain. Should I check it up?"

In [92]:
res1_with_knowledge = qa.invoke(query1)
res1_without_knowlege = llm.invoke(query1)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [119]:
print(res1_with_knowledge["result"])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

There are episodic and chronic forms of tension-type
headache. Patients with the episodic form suffer fromheadache on fewer than 15 days per month (180 days per
year). The individual episodes of headache may last from
30 minutes to several days. Patients with the chronic
form suffer from headache on more than 15 days permonth (180 days per year).

and tension-type headache.
Symptomatic Headache
Symptomatic headache is due to a structural lesion, in-
fection, or inflammation of intra- and/or extracranial
tissue. Its direct cause is often a pathological alteration of
intracranial pressure, which excites nociceptive nerve
endings in the meninges. The ICP may be either too high

(particularly because of space-occupying lesions such ashematomas, tumors, and hydrocephalus) or too low
(e. g., in the intracranial hypotension syndrom

In [94]:
print(res1_without_knowlege)

Why am I having headaches for the past 3 days?

Here are a few things I could give you which I‪m in mind when you‪re working on the project. I want to talk about something new or new that should make my life better.
If you are a business owner you would like to let me know in the comment section about the project on the blog and to send in your comments.
What are your plans for the product? What are your plans for the next 2.5 months?
The last three months have been a great period for me and I wish I could have done a little more, but I have to confess that sometimes I miss myself and I have to admit I just don‪t feel like putting in enough time to make things happen. I need to focus on how these things are going to get in it and I want to give them just the opposite of thinking about each other, but hopefully I will give a fair share of what I‪m doing for the future as a startup leader.
What kind of business are you planning on doing next?
I intend to spend a week and a half working o

In [95]:
from langchain.chains.conversation.memory import ConversationBufferMemory

In [112]:
conv_mem = ConversationBufferMemory(
    memory_key = "chat-history",
    k = 5, 
    return_messsages = True
)

In [113]:
from langchain.agents import Tool

In [114]:
qa.run(query3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\ninner canthus of the eye. There may be a continuous,\nbackground pain in addition to the typical, lightning-\nlike, shooting pain. In this condition, as in the otherneuralgias of the face, the pain is provoked by chewing\nor by touch—here, by touching the eye. The attacks areoften accompanied by redness of the eye, swelling of the\nnasal mucosa, and lacrimation. They can often be\n\nNystagmus coarse nystagmus toward the side of the lesion, in-creasing with gaze toward the side of the lesion,decreasing on closure of the eyescf. Fig. 11.1\n\nthe nerves to the eye muscles. Optic nerve dysfunction\nonly rarely improves, but palsies of cranial nerves III, IV,\nand VI usually resolve in two to three months. Fractures\nof the petrous pyramid(s) may cause facial nerve palsy\nas well as deafness, due to injury either to the vestib

In [115]:
tools =  [
    Tool(
        name = "Knowledge Base",
        func = qa.run,
        description = (
            'use this tool when answering general knowledge queries to get '
            'more information about the topic'
        )
    )
]

In [116]:
from langchain.agents import initialize_agent

In [117]:
agent = initialize_agent(
    agent = "chat-conversational-react-description",
    tools = tools,
    llm = llm,
    verbose = True,
    max_iterations = 3,
    early_stopping_method = "generate",
    memory = conv_mem
)