In [1]:
%pwd

'/home/sneeraj/medbot/Med-bot---demo/research'

In [3]:
os.chdir("/home/sneeraj/medbot/Med-bot---demo")

In [2]:
import os 
#os.chdir("../")

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [5]:
#Extract Data from PDF 

def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    documents= loader.load()
    return documents

In [6]:
extracted_data = load_pdf_file(data="Data/")

In [19]:
#extracted_data

In [7]:
#Split the data into Text Chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))

6973


In [26]:
#text_chunks

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings

In [10]:
#Downloading embeddings from huggingface

def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [12]:
query_result = embeddings.embed_query("Hello world")
print(len(query_result))

384


In [41]:
#query_result


In [13]:
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
PINECONE_API_KEY=os.environ.get("PINECONE_API_KEY")
GROQ_API_KEY=os.environ.get("GROQ_API_KEY")

In [19]:
from pinecone import Pinecone, ServerlessSpec
import os

pc = Pinecone(api_key = PINECONE_API_KEY)


pc.create_index(
    name = "medbot-index",
    dimension = 384,
    metric = "cosine",
    spec = ServerlessSpec(
        cloud = "aws",
        region = "us-east-1",
    )
)

{
    "name": "medbot-index",
    "metric": "cosine",
    "host": "medbot-index-4vatepi.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [20]:
import os 
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [21]:
#embed each chunk and upsert the embeddings into your pinecone index

from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    index_name = "medbot-index",
    embedding = embeddings,
)

In [22]:
# Load Existing index

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name="medbot-index",
    embedding = embeddings,
)

In [20]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x7401dc6d0130>

In [23]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [27]:
retrieved_docs = retriever.invoke("What is Corneal abrasion?")

In [28]:
retrieved_docs

[Document(id='0ec62aa8-50c5-4e3b-8726-d6aa691ddb16', metadata={'author': '', 'creationdate': '2017-05-01T10:37:35-07:00', 'creator': '', 'keywords': '', 'moddate': '2017-05-01T10:37:35-07:00', 'page': 59.0, 'page_label': '60', 'producer': 'GPL Ghostscript 9.10', 'source': 'Data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf', 'subject': '', 'title': '', 'total_pages': 759.0}, page_content='Degeneration of the macula (the central part of the\nretina where the rods and cones are most dense)\nthat leads to loss of central vision in people over 60.\nCataract —Progressive opacity or clouding of an\neye lens, which obstructs the passage of light to\nthe retina.\nCornea—Clear outer covering of the front of the eye.\nIntraocular lens—Lens made of silicone or plastic\nplaced within the eye; can be corrective.\nRetina—Innermost layer at the back of the eye,'),
 Document(id='9ac259c0-3c54-4e29-a22d-27232e53d7ff', metadata={'author': '', 'creationdate': '2017-05-01T10:37:35-07:00', 'creator': '', 'k

In [24]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=GROQ_API_KEY,
    model="llama3-8b-8192",  # You can try mistralai/mixtral-8x7b or others too
)

In [25]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a medical assistant chatbot trained on medical literature. "
    "Answer user questions directly in a natural way. "
    "Do NOT mention or refer to 'context', 'retrieved text', 'documents', or 'sources'. "
    "Instead, summarize and rephrase in your own words. "
    "Keep the explanation clear, accurate, and professional. "
    "If unsure, say so and recommend consulting a healthcare professional.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [26]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [27]:
response = rag_chain.invoke({"input": "What causes Corneal abrasion"})
print(response["answer"])

A corneal abrasion is usually caused by direct injury to the eye. This can occur from various factors such as:

* Fingernail scratches
* Makeup brushes or other sharp objects that come into contact with the eye
* Contact lenses that are inserted or removed incorrectly
* Sunglasses or other eyewear that is not properly fitted or adjusted
* Foreign particles or debris that enter the eye
* Accidental rubbing or bumping of the eye
* Trauma or blow to the eye
* Irritants such as chemicals or cleaning agents that come into contact with the eye

These injuries can cause a small tear or scratch on the surface of the cornea, leading to a corneal abrasion.
