# Import Libraries and Read Data

In [1]:
pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.19-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.41 (from langchain-community)
  Downloading langchain_core-0.3.45-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.20 (from langchain-community)
  Downloading langchain-0.3.20-py3-none-any.whl.metadata (7.7 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting langsmith<0.4,>=0.1.125 (from langchain-community)
  Downloading langsmith-0.3.15-py3-none-any.whl.metadata (14 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Using cached httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting numpy<3,>=1.26.2 (from langchain-community)
  Downloading numpy-2.2.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.6 (from langchain<1.0.0,>=0.3.20->langchain-community)

DEPRECATION: Loading egg at c:\python311\lib\site-packages\vboxapi-1.0-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'c:\\Python311\\Scripts\\f2py.exe' -> 'c:\\Python311\\Scripts\\f2py.exe.deleteme'


[notice] A new release of pip is available: 22.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


DEPRECATION: Loading egg at c:\python311\lib\site-packages\vboxapi-1.0-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330

[notice] A new release of pip is available: 22.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from transformers import pipeline
import json
import re

# Read Data from CSV Files Using pd.read_csv
# Adjust file paths if necessary
patient_df = pd.read_csv("/content/drive/MyDrive/data/nagpur_patients.csv")
hospital_df = pd.read_csv("/content/drive/MyDrive/data/hospital_final.csv")
doctor_df = pd.read_csv("/content/drive/MyDrive/data/combined_doctors_data.csv")
appointment_df = pd.read_csv("/content/drive/MyDrive/data/appointment.csv")

# Preprocess Data for Embedding

In [5]:
# Preprocess Data for Embedding
patient_docs = [
    f"Patient ID: {row['Patient ID']}, Name: {row['Full Name']}, Age: {row['Age']}, Gender: {row['Gender']}, BMI: {row['BMI']}, Blood Group: {row['Blood Group']}, Location: {row['Location']}, Diagnosis: {appointment_df[appointment_df['patient_id'] == row['Patient ID']]['diagnosis'].iloc[0] if row['Patient ID'] in appointment_df['patient_id'].values else 'Unknown'}"
    for _, row in patient_df.iterrows()
]

hospital_docs = [
    f"Hospital ID: {row['hospital_id']}, Name: {row['name']}, Location: {row['location']}, Specialty: {row['specialty']}, Beds Available: {row['beds_available']}"
    for _, row in hospital_df.iterrows()
]

doctor_docs = [
    f"Doctor ID: {row['Doctor ID']}, Name: {row['Full Name']}, Hospital ID: {row['Hospital ID'] if pd.notna(row['Hospital ID']) else 'Unknown'}, Specialization: {row['Specialization']}"
    for _, row in doctor_df.iterrows()
]

# Create Embeddings with LangChain

In [8]:
# Create Embeddings with LangChain
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

patient_documents = [Document(page_content=doc) for doc in patient_docs]
hospital_documents = [Document(page_content=doc) for doc in hospital_docs]
doctor_documents = [Document(page_content=doc) for doc in doctor_docs]

patient_vectorstore = FAISS.from_documents(patient_documents, embedding_model)
hospital_vectorstore = FAISS.from_documents(hospital_documents, embedding_model)
doctor_vectorstore = FAISS.from_documents(doctor_documents, embedding_model)

# Retrieval Module (Similarity Search)

In [9]:
# Retrieval Module (Similarity Search)
def retrieve_context(query):
    patient_results = patient_vectorstore.similarity_search(query, k=1)
    hospital_results = hospital_vectorstore.similarity_search(query, k=1)
    doctor_results = doctor_vectorstore.similarity_search(query, k=2)  # Retrieve 2 doctors

    context = {
        "patient_context": patient_results[0].page_content,
        "hospital_context": hospital_results[0].page_content,
        "doctor_context": [doc.page_content for doc in doctor_results]
    }
    return context

# Generation Module (Fully Dynamic)

In [10]:
# Generation Module (Fully Dynamic)
generator = pipeline("text-generation", model="distilgpt2", max_new_tokens=150, device=-1)

def generate_response(query, context):
    prompt = (
        f"Given the following context:\n"
        f"Patient Context: {context['patient_context']}\n"
        f"Hospital Context: {context['hospital_context']}\n"
        f"Doctor Context: {context['doctor_context']}\n\n"
        f"Query: {query}\n\n"
        f"Extract the conditions from the query, recommend doctors based on the context, assess severity for each condition based on available data, suggest a hospital considering all conditions, and provide real-time navigation advice."
    )

    try:
        response_text = generator(prompt)[0]["generated_text"]
    except Exception as e:
        response_text = "Failed to generate response from LLM."

    result = {
        "predicted_conditions": [],
        "severity": {},
        "recommended_doctors": {},
        "hospital_recommendation": "",
        "hospital_justification": "",
        "real_time_navigation": ""
    }

    # Extract conditions from the query
    condition_pattern = r"with\s+(.+?)(?:\s+and\s+(.+?))?(?=\s+(?:in|recommend|\.|$))"
    matches = re.search(condition_pattern, query.lower())
    if matches:
        conditions = [matches.group(1).strip()]
        if matches.group(2):  # If there's a second condition after "and"
            conditions.append(matches.group(2).strip())
    else:
        conditions = ["Unknown condition"]

    # Infer conditions from specialties if vague terms are used
    specialties = re.findall(r"recommend\s+a\s+([a-zA-Z\s]+)(?=\s+and|\s*,|\s*$)", query.lower())
    specialties = [spec.strip().title() for spec in specialties]
    if "condition" in conditions[0].lower() or "issue" in conditions[0].lower():
        conditions = [f"{spec}-Related Condition" for spec in specialties] or ["Unknown condition"]
    result["predicted_conditions"] = conditions

    # Determine severity for each condition
    for condition in result["predicted_conditions"]:
        matched_appointments = appointment_df[appointment_df['diagnosis'].str.lower().str.contains(condition.lower().split('-')[0], na=False)]
        if not matched_appointments.empty:
            severity_score = max(1, min(10, 10 - (len(matched_appointments) // 5)))
            result["severity"][condition] = f"{severity_score}/10"
        else:
            bmi = float(re.search(r"BMI: (\d+\.?\d*)", context["patient_context"]).group(1))
            severity_score = max(1, min(10, int((bmi - 20) * 2)))
            result["severity"][condition] = f"{severity_score}/10"

    # Recommend doctors based on specialties
    for specialty in specialties:
        doctor = next((re.search(r"Name: (.*?)(?:,|$)", doc).group(1).strip()
                       for doc in context["doctor_context"]
                       if specialty.lower() in doc.lower()), None)
        result["recommended_doctors"][specialty] = doctor if doctor else "Not found in context"

    # Hospital recommendation logic
    hospital_specialty = re.search(r"Specialty: (.*?)(?:,|$)", context["hospital_context"]).group(1).strip()
    hospital_name = re.search(r"Name: (.*?)(?:,|$)", context["hospital_context"]).group(1).strip()
    beds_available = int(re.search(r"Beds Available: (\d+)", context["hospital_context"]).group(1))

    if beds_available >= 20 and all(any(spec.lower() in hospital_specialty.lower() for spec in specialties) for spec in specialties):
        result["hospital_recommendation"] = hospital_name
        result["hospital_justification"] = f"{hospital_name} has {beds_available} beds and covers {hospital_specialty}."
    else:
        multispecialty_options = hospital_df[(hospital_df["beds_available"] >= 20) &
                                             (hospital_df["specialty"].str.lower().apply(lambda x: all(any(spec.lower() in x for spec in specialties) for spec in specialties)))]
        if not multispecialty_options.empty:
            fallback_hospital = multispecialty_options.iloc[0]["name"]
            result["hospital_recommendation"] = fallback_hospital
            result["hospital_justification"] = f"{hospital_name} lacks coverage or beds ({beds_available}). Recommending {fallback_hospital} with {multispecialty_options.iloc[0]['beds_available']} beds."
        else:
            result["hospital_recommendation"] = hospital_name
            result["hospital_justification"] = f"{hospital_name} recommended despite limitations ({beds_available} beds, {hospital_specialty})."

    # Navigation
    hospital_location = re.search(r"Location: (.*?)(?:,|$)", context["hospital_context"]).group(1).strip()
    result["real_time_navigation"] = f"Head to {hospital_location}"

    return result

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


# RAG Pipeline with JSON Output

In [11]:
# RAG Pipeline with JSON Output
def rag_pipeline(query):
    context = retrieve_context(query)
    response = generate_response(query, context)

    output = {
        "query": query,
        "context": context,
        "response": response
    }
    return json.dumps(output, indent=2)

# Test the RAG System

In [12]:
# Test the RAG System
query = "Evaluate a 30-year-old male with severe abdominal pain and vomiting in Nagpur, recommend a gastroenterologist, assess severity, and suggest a hospital with immediate availability."
json_response = rag_pipeline(query)
print(json_response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{
  "query": "Evaluate a 30-year-old male with severe abdominal pain and vomiting in Nagpur, recommend a gastroenterologist, assess severity, and suggest a hospital with immediate availability.",
  "context": {
    "patient_context": "Patient ID: 126, Name: Sanjay Kumar, Age: 41, Gender: Male, BMI: 26.2, Blood Group: A-, Location: Koradi Road, Diagnosis: Vertigo",
    "hospital_context": "Hospital ID: 7, Name: NIMS Hospital, Location: Nagpur, Specialty: Multispecialty, Beds Available: 30",
    "doctor_context": [
      "Doctor ID: 1001, Name: Dr. Nitin Tiwari, Hospital ID: 1.0, Specialization: Cardiologist",
      "Doctor ID: 1003, Name: Dr. Piyush Marudwar, Hospital ID: 11.0, Specialization: Gastroenterologist"
    ]
  },
  "response": {
    "predicted_conditions": [
      "severe abdominal pain",
      "vomiting"
    ],
    "severity": {
      "severe abdominal pain": "10/10",
      "vomiting": "10/10"
    },
    "recommended_doctors": {
      "Gastroenterologist": "Dr. Piyush Marudw

# Analytics Dashboard (JSON Output)

In [13]:
# Analytics Dashboard (JSON Output)
def analytics_dashboard():
    hospital_performance = hospital_df.groupby("name")["beds_available"].sum().to_dict()
    analytics_output = {
        "hospital_performance": {
            "date": "2025-03-14",
            "metrics": hospital_performance
        }
    }
    return json.dumps(analytics_output, indent=2)

# Run the analytics dashboard
analytics_json = analytics_dashboard()
print("\nAnalytics Dashboard:")
print(analytics_json)


Analytics Dashboard:
{
  "hospital_performance": {
    "date": "2025-03-14",
    "metrics": {
      "AIIMS Nagpur": 200,
      "Alexis Multispeciality Hospital": 50,
      "Avanti Institute of Cardiology Pvt. Ltd": 20,
      "CARE Hospitals": 25,
      "Crescent Hospital And Heart Centre": 10,
      "Daga Memorial Government Women's Hospital": 50,
      "Government Medical College and Hospital": 386,
      "HCG NCHRI Cancer Centre": 20,
      "Indira Gandhi Government Medical College & Hospital": 200,
      "KIMS Kingsway Hospital": 100,
      "Kalpavruksha Hospital": 30,
      "Meditrina Institute of Medical Sciences": 30,
      "NIMS Hospital": 30,
      "Orange City Hospital & Research Institute": 50,
      "Shankara Hospital": 30,
      "Super Specialty Hospital": 50,
      "Suretech Hospital and Research Centre Limited": 35,
      "Swasthyam Superspeciality Hospital": 20,
      "Wockhardt Heart Hospital": 10,
      "Wockhardt Super Speciality Hospital": 28
    }
  }
}
