In [2]:
!pip install pip install numpy pandas scikit-learn joblib
!pip install pymupdf pillow pytesseract




ERROR: Could not find a version that satisfies the requirement install (from versions: none)
ERROR: No matching distribution found for install

[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting pymupdf
  Downloading pymupdf-1.26.7-cp310-abi3-win_amd64.whl (18.4 MB)
     ---------------------------------------- 0.0/18.4 MB ? eta -:--:--
     ---------------------------------------- 0.0/18.4 MB ? eta -:--:--
     ---------------------------------------- 0.0/18.4 MB ? eta -:--:--
     --------------------------------------- 0.0/18.4 MB 217.9 kB/s eta 0:01:25
     --------------------------------------- 0.0/18.4 MB 245.8 kB/s eta 0:01:15
     --------------------------------------- 0.1/18.4 MB 272.3 kB/s eta 0:01:08
     --------------------------------------- 0.1/18.4 MB 327.7 kB/s eta 0:00:56
     --------------------------------------- 0.1/18.4 MB 344.8 kB/s eta 0:00:54
     --------------------------------------- 0.1/18.4 MB 387.0 kB/s eta 0:00:48
     --------------------------------------- 0.2/18.4 MB 419.0 kB/s eta 0:00:44
     --------------------------------------- 0.2/18.4 MB 406.9 kB/s eta 0:00:45
     --------------------------------------- 0.2/18.4 MB 401.


[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


EXTRACT ALL VITALS FROM SINGLE REPORT OF PATIENT

In [None]:




import fitz
import pytesseract
from PIL import Image
import re
import os

PDF_PATH = "medicalreports/report_1.pdf"

if not os.path.exists(PDF_PATH):
    raise FileNotFoundError("Medical report PDF not found")

print("Processing medical report:", PDF_PATH)

def extract_text(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""

    for i in range(len(doc)):
        page = doc[i]
        page_text = page.get_text()

        if page_text.strip() == "":
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            page_text = pytesseract.image_to_string(img)
            print("OCR applied on page", i + 1)

        text += page_text.lower() + "\n"

    return text


def extract_vitals(text):
    patterns = {
        "heart_rate": r"heart rate\s*[:\-]?\s*(\d+)",
        "map": r"map\s*[:\-]?\s*(\d+)",
        "respiratory_rate": r"respiratory rate\s*[:\-]?\s*(\d+)",
        "temperature": r"temperature\s*[:\-]?\s*(\d+\.?\d*)",
        "glucose": r"glucose\s*[:\-]?\s*(\d+\.?\d*)",
        "creatinine": r"creatinine\s*[:\-]?\s*(\d+\.?\d*)",
        "bun": r"bun\s*[:\-]?\s*(\d+\.?\d*)",
        "sodium": r"sodium\s*[:\-]?\s*(\d+\.?\d*)",
        "potassium": r"potassium\s*[:\-]?\s*(\d+\.?\d*)",
        "hemoglobin": r"hemoglobin\s*[:\-]?\s*(\d+\.?\d*)",
        "wbc": r"wbc\s*[:\-]?\s*(\d+\.?\d*)",
        "lactate": r"lactate\s*[:\-]?\s*(\d+\.?\d*)",
        "fluid_balance": r"fluid balance\s*[:\-]?\s*(-?\d+)",
        "vasopressors": r"vasopressor[s]?",
        "sedatives": r"sedative[s]?",
        "antibiotics": r"antibiotic[s]?",
        "insulin": r"insulin"
    }

    vitals = {}

    for key, pat in patterns.items():
        match = re.search(pat, text)
        if match:
            vitals[key] = float(match.group(1)) if match.groups() else 1
        else:
            vitals[key] = 0

    return vitals


raw_text = extract_text(PDF_PATH)
vitals = extract_vitals(raw_text)

print("Extracted vitals:")
for k, v in vitals.items():
    print(k, ":", v)


Processing medical report: medicalreports/report_1.pdf
Extracted vitals:
heart_rate : 0
map : 0
respiratory_rate : 0
temperature : 0
glucose : 220.0
creatinine : 2.1
bun : 0
sodium : 138.0
potassium : 4.6
hemoglobin : 10.8
wbc : 0
lactate : 0
fluid_balance : 0
vasopressors : 0
sedatives : 0
antibiotics : 0
insulin : 1


LSTM RISK + MEDICAL PROBLEMS MAKES USE OF TRAINED ML MODEL

In [None]:
# =========================================================
# PART 2: LSTM RISK + MEDICAL PROBLEMS (SEQUENCE FIXED)
# =========================================================

import numpy as np
import tensorflow as tf
import joblib

MODEL_PATH = "../model_final_2.h5"
SCALER_PATH = "../scaler.pkl"

model = tf.keras.models.load_model(MODEL_PATH)
scaler = joblib.load(SCALER_PATH)

print("LSTM model loaded")
print("Expected input shape:", model.input_shape)


# FEATURES USED DURING TRAINING (DO NOT CHANGE)


MODEL_FEATURES = ["glucose", "creatinine", "hemoglobin"]
SEQUENCE_LENGTH = 24


# BUILD 24-TIMESTEP SEQUENCE FROM SINGLE REPORT


def build_lstm_sequence(vitals):
    """
    Since we have only one report,
    repeat the same vitals for 24 timesteps.
    """

    feature_vector = np.array(
        [vitals.get(f, 0) for f in MODEL_FEATURES],
        dtype=float
    )

    # Scale single timestep
    feature_vector_scaled = scaler.transform(
        feature_vector.reshape(1, -1)
    )[0]

    # Repeat for 24 timesteps
    sequence = np.tile(
        feature_vector_scaled,
        (SEQUENCE_LENGTH, 1)
    )

    # Final shape: (1, 24, 3)
    return sequence.reshape(1, SEQUENCE_LENGTH, len(MODEL_FEATURES))

#PREDICT RISK (LSTM)

def predict_risk(vitals):
    x = build_lstm_sequence(vitals)

    score = model.predict(x)[0][0]

    if score > 0.7:
        risk = "HIGH"
    elif score > 0.4:
        risk = "MODERATE"
    else:
        risk = "LOW"

    return risk, score

# IDENTIFY MEDICAL PROBLEMS (USES ALL VITALS)

def identify_problems(v):
    problems = []

    if v["glucose"] > 140:
        problems.append("Hyperglycemia / Diabetes risk")

    if v["creatinine"] > 1.3 or v["bun"] > 20:
        problems.append("Renal dysfunction")

    if v["lactate"] > 2:
        problems.append("Possible sepsis / hypoperfusion")

    if v["hemoglobin"] < 12:
        problems.append("Anemia")

    if v["wbc"] > 11:
        problems.append("Infection / inflammation")

    if v["vasopressors"] == 1:
        problems.append("Hemodynamic instability")

    if not problems:
        problems.append("Vitals within acceptable limits")

    return problems



risk, risk_score = predict_risk(vitals)
problems = identify_problems(vitals)

print("Predicted risk:", risk)
print("Risk score:", risk_score)
print("Identified problems:")
for p in problems:
    print("-", p)




LSTM model loaded
Expected input shape: (None, 24, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step
Predicted risk: MODERATE
Risk score: 0.46314555
Identified problems:
- Hyperglycemia / Diabetes risk
- Renal dysfunction
- Anemia


In [16]:

!pip install google-generativeai




[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
!pip install google-genai


Collecting google-genai
  Downloading google_genai-1.59.0-py3-none-any.whl (719 kB)
     ---------------------------------------- 0.0/719.1 kB ? eta -:--:--
      --------------------------------------- 10.2/719.1 kB ? eta -:--:--
     -- ---------------------------------- 41.0/719.1 kB 653.6 kB/s eta 0:00:02
     ---- -------------------------------- 92.2/719.1 kB 871.5 kB/s eta 0:00:01
     ------ ----------------------------- 122.9/719.1 kB 798.9 kB/s eta 0:00:01
     ------------ ------------------------- 245.8/719.1 kB 1.3 MB/s eta 0:00:01
     --------------- ---------------------- 286.7/719.1 kB 1.4 MB/s eta 0:00:01
     --------------------- ---------------- 409.6/719.1 kB 1.4 MB/s eta 0:00:01
     ----------------------- -------------- 440.3/719.1 kB 1.3 MB/s eta 0:00:01
     ------------------------- ------------ 491.5/719.1 kB 1.3 MB/s eta 0:00:01
     -------------------------------- ----- 614.4/719.1 kB 1.4 MB/s eta 0:00:01
     ----------------------------------- -- 665.6


[notice] A new release of pip is available: 23.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Generate 7 diet plan based on patient's health using google gemini api

In [None]:
import json
from google import genai


# Configure Gemini API Key

# GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"  # from AI Studio

API_KEY="MY_API_KEY"

client = genai.Client(api_key=API_KEY)
print("Gemini client initialized")


#  Load Clinical & Diet KB

with open("clinical_output.json", "r") as f:
    clinical_data = json.load(f)

with open("diet_kb.json", "r") as f:
    diet_kb = json.load(f)


#  Build Prompt

prompt_text = f"""
You are a clinical dietitian AI.

IMPORTANT RULES:
- The patient is an ADULT unless explicitly stated otherwise.
- Do NOT assume pediatric or toddler age.
- Use ONLY Indian food items or Indian-style meals.
- Meals must be realistic (e.g., "Oats with skim milk", "Grilled chicken salad").

Patient clinical metrics:
{json.dumps(clinical_data, indent=2)}

Diet knowledge base:
{json.dumps(diet_kb[:50], indent=2)}

Generate a ONE-DAY diet plan in EXACT JSON FORMAT below.
Do NOT add text outside JSON.

{{
  "day_plan": {{
    "breakfast": [
      {{
        "item": "",
        "calories": 0.0,
        "protein": 0.0,
        "fat": 0.0,
        "carbs": 0.0,
        "tags": []
      }}
    ],
    "lunch": [],
    "dinner": [],
    "snacks": []
  }},
  "total_nutrition": {{
    "calories": 0.0,
    "protein": 0.0,
    "fat": 0.0,
    "carbs": 0.0
  }},
  "medical_reasoning": ""
}}

STRICT TAGGING RULES (MANDATORY):
- If creatinine > 2.0 → EVERY meal item MUST include "renal_safe"
- If glucose > 180 → EVERY meal item MUST include "diabetic_friendly" AND "low_sugar"
- If sodium > 145 → EVERY meal item MUST include "heart_healthy"
- If cholesterol > 240 → AVOID fried foods AND include "low_fat"
- Tags must be added EVEN IF the food is naturally safe

MEAL STYLE REQUIREMENT:
- Meals must be described in natural form:
  Example:
  "Oats with skim milk"
  "Grilled chicken salad with olive oil"
  "Vegetable soup with carrot and bottle gourd"
- Avoid listing raw ingredients alone.
- Follow Indian eating patterns.


"""



#  Call Gemini Model

response = client.models.generate_content(
    model="gemini-2.5-flash",   # or another available Gemini model
    contents=prompt_text     # pass prompt as a plain string
)


#  Extract Text Directly

diet_plan_text = response.text  # this property is correct usage
print("Raw Gemini output:\n", diet_plan_text)


#  Parse JSON Safely

try:
    diet_plan_json = json.loads(diet_plan_text)
except json.JSONDecodeError:
    # Clean up if necessary
    cleaned = diet_plan_text.strip().strip("```json").strip("```")
    diet_plan_json = json.loads(cleaned)


#  Save Output

with open("daily_diet_plan.json", "w") as f:
    json.dump(diet_plan_json, f, indent=4)

print("Diet plan JSON saved as daily_diet_plan.json")
import json
from google import genai

NUM_DAYS = 7  # frontend can change later

# Extend Prompt (NO rewrite)

weekly_prompt = prompt_text + f"""

ADDITIONAL INSTRUCTIONS (DO NOT IGNORE):

- Generate diet plans for {NUM_DAYS} DAYS.
- Each day MUST have meal VARIATION.
- Structure output EXACTLY as below.
- Compute weekly nutrition summary by summing all days.
- Generate a grocery list aggregated from all meals.

FINAL OUTPUT JSON FORMAT (STRICT):

{{
  "days_generated": {NUM_DAYS},
  "diet_plans": {{
    "day_1": {{ ... SAME STRUCTURE AS SINGLE DAY ... }},
    "day_2": {{ ... }},
    "...": {{ }}
  }},
  "weekly_nutrition_summary": {{
    "average_per_day": {{
      "calories": 0.0,
      "protein": 0.0,
      "fat": 0.0,
      "carbs": 0.0
    }},
    "weekly_total": {{
      "calories": 0.0,
      "protein": 0.0,
      "fat": 0.0,
      "carbs": 0.0
    }}
  }},
  "grocery_list": [
    {{
      "item": "",
      "times_used_in_week": 0
    }}
  ]
}}

RULES:
- Maintain ALL medical tagging rules.
- No forbidden foods.
- Indian meals only.
- No text outside JSON.
"""


# Single Gemini Call (Quota Safe)

response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=weekly_prompt
)

raw_output = response.text.strip()
print("Raw Weekly Output:\n", raw_output)

# Parse JSON Safely

try:
    weekly_diet_json = json.loads(raw_output)
except json.JSONDecodeError:
    cleaned = raw_output.strip("```json").strip("```")
    weekly_diet_json = json.loads(cleaned)


# Save Final Output

with open("weekly_diet_plan.json", "w") as f:
    json.dump(weekly_diet_json, f, indent=4)

print(" Weekly diet plan saved as weekly_diet_plan.json")


{
  "days_generated": 7,
  "diet_plans": {
    "day_1": {
      "breakfast": [
        {
          "item": "Bajra Porridge",
          "calories": 174.0,
          "protein": 5.48,
          "fat": 2.72,
          "carbs": 30.89,
          "tags": [
            "diabetic_friendly",
            "low_sugar",
            "high_fiber_diet",
            "low_gi_foods"
          ]
        },
        {
          "item": "Almonds",
          "calories": 60.92,
          "protein": 1.84,
          "fat": 5.85,
          "carbs": 0.3,
          "tags": [
            "diabetic_friendly",
            "low_sugar",
            "high_fiber_diet",
            "low_gi_foods"
          ]
        }
      ],
      "lunch": [
        {
          "item": "Barley Roti",
          "calories": 189.47,
          "protein": 6.56,
          "fat": 0.78,
          "carbs": 36.77,
          "tags": [
            "diabetic_friendly",
            "low_sugar",
            "high_fiber_diet",
            "low_gi_foods"
