In [1]:
print("OK")

OK


In [2]:
import os 
os.chdir("../")

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
# Extract text from PDF files
def load_pdf_files(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf_files("data")


In [6]:
extracted_data

[Document(metadata={'producer': 'Adobe PDF Library 16.0.7', 'creator': 'Adobe InDesign 17.4 (Macintosh)', 'creationdate': '2022-10-21T08:31:18-04:00', 'moddate': '2022-10-21T08:31:22-04:00', 'trapped': '/False', 'source': 'data\\GeneralUrology-UrologyBasics-FS-2022-English (1).pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='What is Urology?\nUrology is a branch of medicine that deals with health \nproblems of the male and female urinary systems, and the \nmale reproductive system.\nMale urinary tract\nFemale urinary tract\nThe urinary tract stores and gets rid of urine (liquid waste) \nand extra water. The urinary tract is a pathway in the body. It \nincludes the kidneys, ureters (tubes from kidneys to bladder), \nbladder and urethra (tube from bladder to outside the body). \nThe male reproductive system makes, stores and moves \nsemen to make babies. It’s made of the prostate, scrotum, \ntestes and penis. A doctor who specializes in these body \nparts is called a 

In [7]:
len(extracted_data)

2

In [8]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [9]:
minimal_docs = filter_to_minimal_docs(extracted_data)

In [10]:
minimal_docs

[Document(metadata={'source': 'data\\GeneralUrology-UrologyBasics-FS-2022-English (1).pdf'}, page_content='What is Urology?\nUrology is a branch of medicine that deals with health \nproblems of the male and female urinary systems, and the \nmale reproductive system.\nMale urinary tract\nFemale urinary tract\nThe urinary tract stores and gets rid of urine (liquid waste) \nand extra water. The urinary tract is a pathway in the body. It \nincludes the kidneys, ureters (tubes from kidneys to bladder), \nbladder and urethra (tube from bladder to outside the body). \nThe male reproductive system makes, stores and moves \nsemen to make babies. It’s made of the prostate, scrotum, \ntestes and penis. A doctor who specializes in these body \nparts is called a urologist.\nWhat are Urologists?\nA urologist is a medical doctor and surgeon. Urologists \nare trained to find, treat and handle urinary and genital \nproblems. They gain 15+ years of school and training \n(undergraduate, medical and resid

In [11]:
# Split the documents into smaller chunks
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    texts_chunk = text_splitter.split_documents(minimal_docs)
    return texts_chunk

In [12]:
texts_chunk = text_split(minimal_docs)
print(f"Number of chunks: {len(texts_chunk)}")

Number of chunks: 10


In [13]:
texts_chunk

[Document(metadata={'source': 'data\\GeneralUrology-UrologyBasics-FS-2022-English (1).pdf'}, page_content='What is Urology?\nUrology is a branch of medicine that deals with health \nproblems of the male and female urinary systems, and the \nmale reproductive system.\nMale urinary tract\nFemale urinary tract\nThe urinary tract stores and gets rid of urine (liquid waste) \nand extra water. The urinary tract is a pathway in the body. It \nincludes the kidneys, ureters (tubes from kidneys to bladder), \nbladder and urethra (tube from bladder to outside the body).'),
 Document(metadata={'source': 'data\\GeneralUrology-UrologyBasics-FS-2022-English (1).pdf'}, page_content='The male reproductive system makes, stores and moves \nsemen to make babies. It’s made of the prostate, scrotum, \ntestes and penis. A doctor who specializes in these body \nparts is called a urologist.\nWhat are Urologists?\nA urologist is a medical doctor and surgeon. Urologists \nare trained to find, treat and handle ur

In [14]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()

  embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [15]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [16]:
vector = embedding.embed_query("Hello world")
vector

[-0.03447727486491203,
 0.03102317824959755,
 0.006734970025718212,
 0.026108985766768456,
 -0.03936202451586723,
 -0.16030244529247284,
 0.06692401319742203,
 -0.006441489793360233,
 -0.0474504791200161,
 0.014758856035768986,
 0.07087527960538864,
 0.05552763119339943,
 0.019193334504961967,
 -0.026251312345266342,
 -0.01010954286903143,
 -0.02694045566022396,
 0.022307461127638817,
 -0.022226648405194283,
 -0.14969263970851898,
 -0.017493007704615593,
 0.00767625542357564,
 0.05435224249958992,
 0.0032543970737606287,
 0.031725890934467316,
 -0.0846213847398758,
 -0.02940601296722889,
 0.05159561336040497,
 0.04812406003475189,
 -0.0033148222137242556,
 -0.058279167860746384,
 0.04196927323937416,
 0.022210685536265373,
 0.1281888335943222,
 -0.022338971495628357,
 -0.011656315997242928,
 0.06292839348316193,
 -0.032876335084438324,
 -0.09122604131698608,
 -0.031175347045063972,
 0.0526994913816452,
 0.04703482985496521,
 -0.08420311659574509,
 -0.030056199058890343,
 -0.02074483036

In [17]:
print( "Vector length:", len(vector))

Vector length: 384


In [18]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [19]:
# Pinecone setup
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
if not PINECONE_API_KEY:
    raise ValueError("❌ PINECONE_API_KEY not found in .env file")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

# Groq setup
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise ValueError("❌ GROQ_API_KEY not found in .env file")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [20]:
from pinecone import Pinecone 
pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)

In [21]:
pc

<pinecone.pinecone.Pinecone at 0x24603aa95d0>

In [22]:
from pinecone import ServerlessSpec 

index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=384,  # Dimension of the embeddings
        metric= "cosine",  # Cosine similarity
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )


index = pc.Index(index_name)

In [23]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=texts_chunk,
    embedding=embedding,
    index_name=index_name
)

In [24]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding
)

In [35]:
shoukatwith_1 = Document(
    page_content="contact number is 03339017427 and he used whatsapp also on this number."
)

In [25]:
shoukatwith = Document(
    page_content="""Dr. Zakir Khan – Consultant Urologist & Surgeon

Dr. Zakir Khan is a highly respected and experienced medical professional in Pakistan. He is a Consultant Urologist and Surgeon with extensive qualifications and professional achievements. Currently, he practices at Rehmat Medical Center, where he provides specialized medical care in the fields of Urology, Infertility, and General Surgery. His career reflects years of dedication, training, and service in well-known hospitals across Pakistan, including Peshawar, Rawalpindi, and Nowshera.

Educational Qualifications
Dr. Zakir Khan start his education from student model school akora khattak and completed his matriculation from there.then he get admission in intermediate at khyber model college nowshera.

Dr. Zakir Khan’s medical education and postgraduate training are comprehensive and demonstrate his commitment to continuous learning and specialization. His academic achievements include:

MBBS (KMC) – He completed his Bachelor of Medicine and Bachelor of Surgery (MBBS) from Khyber Medical College (KMC), one of the oldest and most prestigious medical institutions in Pakistan. KMC has a long-standing reputation for producing highly competent doctors, and his foundation training there equipped him with essential medical and surgical knowledge.

FCPS (Urology) – He is a Fellow of the College of Physicians and Surgeons Pakistan (FCPS) in Urology, which is one of the most recognized and respected postgraduate medical qualifications in Pakistan. This fellowship represents years of specialized training in Urology, including surgical procedures related to the urinary tract, kidneys, bladder, prostate, and male reproductive system.

MCPS (General Surgery) CRSM (Lahore) – He also holds the Membership of the College of Physicians and Surgeons (MCPS) in General Surgery from CRSM Lahore. This qualification highlights his strong surgical foundation, enabling him to handle a wide range of surgical cases, not limited only to urology.

These qualifications together reflect his ability to serve both as a general surgeon and a specialized urologist.

Professional Experience

Dr. Zakir Khan’s career includes training and service in some of the most reputable hospitals in Pakistan:

Ex Registrar (HMC Peshawar) – He worked as a Registrar at Hayatabad Medical Complex (HMC), Peshawar, where he gained hands-on experience in treating complex urological and surgical cases.

AFIU CMH Rawalpindi – He has been associated with the Armed Forces Institute of Urology (AFIU) at Combined Military Hospital (CMH), Rawalpindi. AFIU is one of the most advanced urology centers in Pakistan, where he had exposure to highly specialized surgeries and procedures in urology.

Urologist DHQ Hospital Nowshera – He served as a Urologist at District Headquarters (DHQ) Hospital Nowshera, where he treated a large number of patients from diverse backgrounds, managing both common and complex urological disorders.

Visiting Consultant Jinnah Teaching Hospital Peshawar – He has also worked as a Visiting Consultant at Jinnah Teaching Hospital, Peshawar, where he shared his expertise with both patients and medical trainees.

Senior Registrar Muhammad Teaching Hospital Peshawar – His career includes serving as a Senior Registrar at Muhammad Teaching Hospital, Peshawar, where he combined clinical practice with mentoring junior doctors.

Through these roles, Dr. Zakir Khan has built a reputation as an expert clinician and surgeon, trusted by patients and respected by colleagues.

Areas of Specialization

Dr. Zakir Khan specializes in the following medical fields:

Urology – Treatment of kidney stones, bladder disorders, prostate diseases, urinary tract infections, male reproductive system issues, and other urinary problems.

Infertility – Providing care and treatment options for male infertility, assisting couples with conception-related challenges.

General Surgery – Performing a wide range of surgical procedures, thanks to his dual training in both general surgery and urology.

His dual qualifications and wide-ranging experience allow him to provide holistic surgical and urological care to his patients.

Contributions to Medical Practice

Patient Care – He is known for his compassionate and patient-centered approach. Patients appreciate his clear explanations and personalized treatment plans.

Teaching & Mentorship – Through his roles in teaching hospitals, he has contributed to the training of future doctors and surgeons.

Hospital Services – His work across multiple hospitals shows his dedication to serving diverse communities, from big cities like Rawalpindi to smaller districts like Nowshera.

Rehmat Medical Center

Currently, Dr. Zakir Khan practices at Rehmat Medical Center, where he leads the Urology & Infertility Clinic. This center provides advanced diagnostic and treatment facilities for patients suffering from urological and infertility problems. The clinic is known for combining modern medicine with patient-focused care.

Frequently Asked Questions (FAQ)

Q1: Who is Dr. Zakir Khan?
A: Dr. Zakir Khan is a Consultant Urologist and Surgeon, practicing at Rehmat Medical Center.

Q2: What are his qualifications?
A: MBBS (KMC), FCPS (Urology), MCPS (General Surgery) CRSM (Lahore).

Q3: Where has he worked?
A: HMC Peshawar, AFIU CMH Rawalpindi, DHQ Hospital Nowshera, Jinnah Teaching Hospital Peshawar, Muhammad Teaching Hospital Peshawar.

Q4: What is his specialization?
A: Urology, Infertility, and General Surgery.

Q5: Where can I consult him?
A: At Rehmat Medical Center.

Q6: Does he have experience in teaching hospitals?
A: Yes, he served as a Senior Registrar and Visiting Consultant at major teaching hospitals in Peshawar.

Tags for Retrieval

Dr. Zakir Khan, Rehmat Medical Center, Urologist in Peshawar, Urologist in Rawalpindi, Urologist in Nowshera, Infertility specialist Pakistan, General Surgeon Peshawar, FCPS Urology, MBBS KMC, MCPS Surgery.

Summary

Dr. Zakir Khan is one of the leading Consultant Urologists and Surgeons in Pakistan. With strong educational qualifications (MBBS, FCPS, MCPS) and years of experience in leading hospitals across Peshawar, Rawalpindi, and Nowshera, he has established himself as a trusted medical expert. His specialization in Urology and Infertility makes him an essential healthcare provider for patients suffering from urinary tract diseases, kidney disorders, prostate issues, male infertility, and surgical needs. Currently, he practices at Rehmat Medical Center, where he continues to serve patients with dedication, skill, and compassion..""",
    metadata={"source": "pdf"}
)

In [36]:
docsearch.add_documents(documents=[shoukatwith_1])

['84df0d82-bafb-4724-bc01-a0fadd57ab66']

In [27]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [28]:
retrieved_docs = retriever.invoke("Who is shoukat?")
retrieved_docs

[Document(id='5843bbd5-5798-4e28-8f9b-7874541a1833', metadata={'source': 'linkedin'}, page_content='shoukat khan is a chatbot developer and data scientist have one year of experience .'),
 Document(id='47ada812-c544-40dd-8c32-382dd1d6493f', metadata={'source': 'pdf'}, page_content='Dr. Zakir Khan – Consultant Urologist & Surgeon\n\nDr. Zakir Khan is a highly respected and experienced medical professional in Pakistan. He is a Consultant Urologist and Surgeon with extensive qualifications and professional achievements. Currently, he practices at Rehmat Medical Center, where he provides specialized medical care in the fields of Urology, Infertility, and General Surgery. His career reflects years of dedication, training, and service in well-known hospitals across Pakistan, including Peshawar, Rawalpindi, and Nowshera.\n\nEducational Qualifications\nDr. Zakir Khan start his education from student model school akora khattak and completed his matriculation from there.then he get admission in 

In [29]:
from langchain_groq import ChatGroq  

chatModel = ChatGroq(model="llama3-8b-8192")


In [30]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [31]:
system_prompt = (
    "You are urology Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [32]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [33]:
response = rag_chain.invoke({"input": "who is shoukat?"})
print(response["answer"])

According to the provided context, Shoukat Khan is a chatbot developer and data scientist with one year of experience.
