In [4]:
%pwd

'c:\\Users\\dell\\Desktop\\New folder\\Medical-Chatbot-Generative-AI\\research'

In [5]:
import os
os.chdir("../")

In [6]:
%pwd

'c:\\Users\\dell\\Desktop\\New folder\\Medical-Chatbot-Generative-AI'

In [7]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [8]:
#extract the data from the pdf file
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)

    documents = loader.load()
    return documents

In [9]:
import os
print(os.getcwd())


c:\Users\dell\Desktop\New folder\Medical-Chatbot-Generative-AI


In [54]:
extracted_data = load_pdf_file(data="Data/")

In [11]:
# extracted_data

In [12]:
#split the data into chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20
    )

    texts = text_splitter.split_documents(extracted_data)
    return texts

In [13]:
text_chunks = text_split(extracted_data)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 5699


In [14]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
print("✅ Model loaded successfully")


  from .autonotebook import tqdm as notebook_tqdm


✅ Model loaded successfully


In [15]:
#download the embeddings from huggingface
from langchain.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [16]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [17]:
query_result = embeddings.embed_query("What is the purpose of the study?")
print(f"Query result: {query_result[:5]}")  # Print first 5 elements of the query result

Query result: [0.008776667527854443, 0.1564365178346634, -0.04509522765874863, 0.051768939942121506, 0.009445116855204105]


In [45]:
import os
from dotenv import load_dotenv
load_dotenv()
# Load environment variables
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
GROQ_API_KEY = os.environ.get('GROQ_API_KEY')

In [23]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medicalbot"

pc.create_index(
    name=index_name,  
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1",
    ),
)

print("✅ Index created successfully!")


✅ Index created successfully!


In [46]:
import os 
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [25]:
# Embedding the text chunks and storing them in Pinecone
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name,  
)

In [26]:
#load existing index
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings,   
)

In [38]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [39]:
retrived_docs = retriever.invoke("What is the purpose of the study?")
retrived_docs

[Document(id='dd95c9b9-802f-43fa-a993-9d22da3bb189', metadata={'author': 'Clifford', 'creationdate': '2004-12-28T15:38:25-05:00', 'creator': 'PyPDF', 'enhanced': 'By PDF Enhancer 2.5/Win', 'moddate': '2005-05-04T13:53:15-06:00', 'page': 411.0, 'page_label': '412', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\The-Gale-Encyclopedia-of-Medicine.pdf', 'spdf': '1096', 'total_pages': 599.0}, page_content='The initial visit often includes a long questionnaire\nabout a patient’s medical and family history , and then a\nlong interview with the doctor, who prompts the patient\nwith many questions. Sometimes a homeopathic doctor\nwill use lab tests to establish a patient’s general level of\nhealth. The initial interview usually lasts between one\nand two hours.\nThe purpose of homeopathy is the restoration of the\nbody to homeostasis, or healthy balance, which is its nat-'),
 Document(id='1b0e343d-15cc-48e6-ab02-a88420fdc458', metadata={'author': 'Clifford', 'creationdate': '2004-12-2

In [50]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0.4,
    max_tokens=500
)


In [41]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a medical research assistant. "
    "Your task is to answer questions based on the provided documents. "
    "If the answer is not found in the documents, respond with 'I don't know'."
    "Use three sentences maximum to answer the question and keep the answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [51]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [52]:
response = rag_chain.invoke({"input":"What is the purpose of the study?"})
print(response['answer'])  
print("✅ Response generated successfully!")

The purpose of homeopathy is the restoration of the body to homeostasis, or healthy balance, which is its natural state. This involves assessing the person's general level of health, as well as physical, emotional, mental, and spiritual aspects of their health picture.
✅ Response generated successfully!


In [53]:
response = rag_chain.invoke({"input":"What is stats?"})
print(response['answer'])  

I don't know. The provided documents do not mention "stats".
