In [2]:
#Import Alll The Libraries
import pandas as pd
import numpy as np
import cv2
import pytesseract
import warnings
import os
import logging

# Suppress all warnings
warnings.filterwarnings("ignore")

# Suppress torch + CUDA logs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["KMP_WARNINGS"] = "0"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

# Suppress EasyOCR's internal logging
logging.getLogger('easyocr').setLevel(logging.ERROR)
logging.getLogger('PIL').setLevel(logging.ERROR)

import re
import easyocr
import pickle
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Set Tesseract path
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\ABC\Desktop\AIML\Tesseract-main\tesseract.exe"

# Step 1: Load dataset
df = pd.read_csv("patient_vitals_dataset_2.csv")

# Step 2: Add Danger label
def label_danger(row):
    return 1 if (
        row['Systolic_BP_mmHg'] > 140 or
        row['Diastolic_BP_mmHg'] > 90 or
        row['SpO2_percent'] < 95 or
        row['Heart_Rate_bpm'] > 100 or
        row['Temperature_C'] < 35.0 or
        row['Temperature_C'] > 39.0 or
        row['Respiratory_Rate_bpm'] < 10 or
        row['Respiratory_Rate_bpm'] > 30
    ) else 0
df['Danger_Level'] = df.apply(label_danger, axis=1)

# Step 3: Features & Labels
feature_cols = ["Systolic_BP_mmHg", "Diastolic_BP_mmHg", "SpO2_percent", "Heart_Rate_bpm", "Temperature_C", "Respiratory_Rate_bpm"]
X = df[feature_cols]
y = df["Danger_Level"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, stratify=y, test_size=0.2, random_state=42)

# Step 4: Train multiple models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(probability=True),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier()
}

accuracies = {}
trained_models = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracies[name] = acc
    trained_models[name] = model
    print(f"üî¨ {name} Accuracy: {acc:.4f}")

# Step 5: Select best model
best_model_name = max(accuracies, key=accuracies.get)
best_model = trained_models[best_model_name]
print(f"\nüèÜ Best Model Selected: {best_model_name} with Accuracy = {accuracies[best_model_name]:.4f}")

_reader = easyocr.Reader(['en'], gpu=False)  # set gpu=True if you have GPU

# Zones tuned for your monitor (fractions of width,height)
ZONES = {
    "HR":   (0.62, 0.98, 0.06, 0.27),   # top-right
    "BP":   (0.62, 0.98, 0.28, 0.52),   # mid-right
    "TEMP": (0.60, 0.98, 0.48, 0.68),   # near temp
    "SPO2": (0.03, 0.45, 0.52, 0.82),   # bottom-left
    "RESP": (0.60, 0.98, 0.68, 0.92),   # bottom-right
}

def _preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found or unreadable: {image_path}")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    denoised = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
    _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return img, thresh

def _easyocr_items(image):
    """Run EasyOCR on BGR image and return items with normalized centers & sizes"""
    h, w = image.shape[:2]
    results = _reader.readtext(image)  # list of (box, text, prob)
    items = []
    for box, text, conf in results:
        xs = [p[0] for p in box]; ys = [p[1] for p in box]
        x_min, x_max, y_min, y_max = min(xs), max(xs), min(ys), max(ys)
        cx = (x_min + x_max) / 2.0; cy = (y_min + y_max) / 2.0
        items.append({
            "text": str(text).strip(),
            "conf": float(conf),
            "box": box,
            "cx": cx / w, "cy": cy / h,
            "bw": (x_max - x_min) / w, "bh": (y_max - y_min) / h
        })
    return items

def _in_zone(item, zone):
    x1,x2,y1,y2 = zone
    return (x1 <= item["cx"] <= x2) and (y1 <= item["cy"] <= y2)

def _nums_in_text(t):
    return re.findall(r'\d+(?:\.\d+)?', t)

# ---------- BP extraction helpers ----------
def extract_bp_from_boxes(ocr_items, image=None, debug=False):
    """Try bounding-box stitching in BP zone, then tesseract crop, then numeric-pair heuristic."""
    # 1) BP-zone stitch with easyocr tokens
    zone = ZONES["BP"]
    zone_items = [it for it in ocr_items if _in_zone(it, zone) and it["conf"] >= 0.3]
    if debug:
        print(f"[BP] {len(zone_items)} tokens in BP zone (conf>=0.3):", [(it['text'], round(it['conf'],2)) for it in zone_items])
    if zone_items:
        zone_items = sorted(zone_items, key=lambda it: it['cx'])
        stitched = " ".join(it['text'] for it in zone_items)
        # try patterns
        m = re.search(r'(\d{2,3})\s*[/\-]\s*(\d{2,3})', stitched)
        if m:
            sbp, dbp = int(m.group(1)), int(m.group(2))
            return sbp, dbp, "bbox-stitched"
        # also try patterns like "BP 120 80" or "120 80 mmhg"
        m2 = re.search(r'\b(\d{2,3})\b[^0-9]{0,6}\b(\d{2,3})\b', stitched)
        if m2:
            cand1, cand2 = int(m2.group(1)), int(m2.group(2))
            if cand1 > cand2:
                return cand1, cand2, "bbox-two-numbers"
            else:
                return cand2, cand1, "bbox-two-numbers"

    # 2) Crop BP zone and use Tesseract configured to digits
    if image is not None:
        h,w = image.shape[:2]
        x1 = int(ZONES["BP"][0]*w); x2 = int(ZONES["BP"][1]*w)
        y1 = int(ZONES["BP"][2]*h); y2 = int(ZONES["BP"][3]*h)
        crop = image[y1:y2, x1:x2]
        if crop.size != 0:
            # enhance crop
            crop_gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
            crop_blur = cv2.GaussianBlur(crop_gray,(3,3),0)
            _, crop_th = cv2.threshold(crop_blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
            config = r'--psm 6 -c tessedit_char_whitelist=0123456789/.-'
            t_text = pytesseract.image_to_string(crop_th, config=config)
            if debug:
                print("[BP crop tesseract] raw:", repr(t_text))
            m = re.search(r'(\d{2,3})\s*[/\-]\s*(\d{2,3})', t_text)
            if m:
                return int(m.group(1)), int(m.group(2)), "tesseract-crop"
            # fallback numeric scan inside t_text
            nums = re.findall(r'\d{2,3}', t_text)
            if len(nums) >= 2:
                nums = [int(x) for x in nums]
                nums_sorted = sorted(nums, reverse=True)
                return nums_sorted[0], nums_sorted[1], "tesseract-crop-numeric"

    # 3) Global candidate search: find horizontal pairs close in y (same line)
    numeric_items = []
    for it in ocr_items:
        for n in _nums_in_text(it['text']):
            numeric_items.append((it, float(n)))
    if debug:
        print("[BP global numeric tokens]", [(it['text'], val) for it,val in numeric_items])
    # Try to find two tokens with similar cy (same row) and plausible BP range
    for i in range(len(numeric_items)):
        it_i, val_i = numeric_items[i]
        for j in range(i+1, len(numeric_items)):
            it_j, val_j = numeric_items[j]
            # same-ish vertical line?
            if abs(it_i['cy'] - it_j['cy']) < 0.06:  # tuneable
                vi, vj = int(round(val_i)), int(round(val_j))
                # plausibility: one should be greater than the other
                big, small = (vi, vj) if vi >= vj else (vj, vi)
                if 60 <= big <= 220 and 30 <= small <= 140:
                    return big, small, "global-pair"
    # not found
    return None, None, None

# ---------- generic zone value picker ----------
def choose_zone_value(ocr_items, name, value_range, prefer_decimal=False, debug=False):
    zone = ZONES[name]
    cands = []
    for it in ocr_items:
        if not _in_zone(it, zone): continue
        if it['conf'] < 0.28: continue
        for n in _nums_in_text(it['text']):
            if prefer_decimal and "." not in n:
                continue
            try:
                v = float(n)
            except:
                continue
            if value_range[0] <= v <= value_range[1]:
                cands.append((v,it))
    if not cands and prefer_decimal:
        # fallback to integer candidates
        for it in ocr_items:
            if not _in_zone(it, zone) or it['conf'] < 0.28: continue
            for n in _nums_in_text(it['text']):
                try:
                    v = float(n)
                except:
                    continue
                if value_range[0] <= v <= value_range[1]:
                    cands.append((v,it))
    if not cands:
        if debug: print(f"[{name}] no candidates")
        return None
    # choose by box height then confidence
    v,it = max(cands, key=lambda x: (x[1]['bh'], x[1]['conf']))
    if debug: print(f"[{name}] chosen {v} from text '{it['text']}' conf={it['conf']}")
    return int(round(v)) if value_range[1] <= 200 else v  # convert small ranges to int

# ---------- main vitals extractor ----------
def extract_vitals_improved(image_path, debug=False):
    if not os.path.exists(image_path):
        raise FileNotFoundError(image_path)
    img, thresh = _preprocess_image(image_path)
    ocr_items = _easyocr_items(img)
    if debug:
        print("[DEBUG] OCR items (text, conf):")
        for it in ocr_items:
            print(" ->", it['text'], f"(conf={it['conf']:.2f}) at ({it['cx']:.2f},{it['cy']:.2f})")

    vitals = {
        'Systolic_BP_mmHg': None,
        'Diastolic_BP_mmHg': None,
        'Heart_Rate_bpm': None,
        'SpO2_percent': None,
        'Temperature_C': None,
        'Respiration_Rate_bpm': None
    }

    # 1) Try BP from boxes + crop + global heuristics
    sbp, dbp, bp_src = extract_bp_from_boxes(ocr_items, img, debug=debug)
    if sbp and dbp:
        vitals['Systolic_BP_mmHg'] = sbp
        vitals['Diastolic_BP_mmHg'] = dbp
        if debug: print(f"[BP] Found {sbp}/{dbp} via {bp_src}")
    else:
        if debug: print("[BP] Not found via bounding boxes/crop/heuristics, will attempt text fallback")

    # 2) For other vitals: zone picks
    hr = choose_zone_value(ocr_items, "HR", (30, 220), prefer_decimal=False, debug=debug)
    spo2 = choose_zone_value(ocr_items, "SPO2", (40, 100), prefer_decimal=False, debug=debug)
    temp = choose_zone_value(ocr_items, "TEMP", (30.0, 45.0), prefer_decimal=True, debug=debug)
    resp = choose_zone_value(ocr_items, "RESP", (3, 60), prefer_decimal=False, debug=debug)

    vitals['Heart_Rate_bpm'] = int(hr) if hr is not None else None
    vitals['SpO2_percent'] = int(spo2) if spo2 is not None else None
    vitals['Temperature_C'] = round(float(temp),1) if temp is not None else None
    vitals['Respiration_Rate_bpm'] = int(resp) if resp is not None else None

    # --- Respiration Rate Extraction Fix ---
    if vitals.get("Respiration_Rate_bpm") is None:
        # Try to find RESP in OCR tokens
        resp_keywords = ["RESP", "RR", "Resp", "Respiration"]
        for kw in resp_keywords:
            if any(kw in token for token, _ in bp_tokens):  # search local OCR tokens
                # pick nearest number to this keyword
                for token, num in bp_tokens:
                    if token.isdigit():
                        vitals["Respiration_Rate_bpm"] = int(num)
                        break
    
    # fallback: if we have a plausible standalone number like 15, 16, 18
    if vitals.get("Respiration_Rate_bpm") is None:
        for token, num in global_numeric_tokens:
            if 10 <= num <= 30:  # normal respiration rate range
                vitals["Respiration_Rate_bpm"] = int(num)
                break


    # 3) Extra text fallback (scan whole OCR text)
    full_text = " ".join([it['text'] for it in ocr_items])
    if (vitals['Systolic_BP_mmHg'] is None or vitals['Diastolic_BP_mmHg'] is None):
        m = re.search(r'(\d{2,3})\s*[\/\-]\s*(\d{2,3})', full_text)
        if m:
            vitals['Systolic_BP_mmHg'] = int(m.group(1)); vitals['Diastolic_BP_mmHg'] = int(m.group(2))
            if debug: print("[BP fallback] matched full_text pattern", m.group(0))
        else:
            # look for "systolic 120" "diastolic 80"
            sys_m = re.search(r'(Systolic|SBP|SYS)[:\s]*?(\d{2,3})', full_text, re.IGNORECASE)
            dia_m = re.search(r'(Diastolic|DBP|DIA)[:\s]*?(\d{2,3})', full_text, re.IGNORECASE)
            if sys_m and dia_m:
                vitals['Systolic_BP_mmHg'] = int(sys_m.group(2))
                vitals['Diastolic_BP_mmHg'] = int(dia_m.group(2))
                if debug: print("[BP fallback] matched Systolic / Diastolic labels")

    if debug:
        print("[FINAL extracted before impute]", vitals)
    return vitals, full_text

# ---------- evaluate with optional imputation ----------
def evaluate_and_predict_from_image(image_path, impute_missing=False, debug=False):
    vitals, full_text = extract_vitals_improved(image_path, debug=debug)
    # debug prints
    print("ü©∫ Extracted Vitals (raw):", vitals)
    if debug:
        print("üîé Full OCR text:", full_text)

    # If any missing and impute_missing True -> fill with dataset median (if df provided)
    if impute_missing:
        med_vals = {}
        # Only compute medians if df exists and has columns we expect
        try:
            for col in ["Systolic_BP_mmHg","Diastolic_BP_mmHg","SpO2_percent","Heart_Rate_bpm","Temperature_C","Respiratory_Rate_bpm"]:
                if col in globals().get("df", pd.DataFrame()).columns:
                    med_vals[col] = int(round(df[col].median()))
                else:
                    med_vals[col] = None
        except Exception as e:
            if debug: print("Impute medians failed:", e)
            med_vals = {k:None for k in vitals.keys()}
        # apply medians where None and available
        for k in vitals.keys():
            if vitals[k] is None and med_vals.get(k) is not None:
                vitals[k] = med_vals.get(k)
                if debug: print(f"[IMPUTE] {k} <- {vitals[k]} (median)")

    # After optional imputation, check again
    if any(v is None for v in vitals.values()):
        print("‚ùå One or more vitals missing after extraction. Returning Insufficient Data.")
        return vitals, "Insufficient Data"

    # Prepare model input and predict (assumes scaler & best_model exist)
    X_input = np.array([
        vitals['Systolic_BP_mmHg'],
        vitals['Diastolic_BP_mmHg'],
        vitals['SpO2_percent'],
        vitals['Heart_Rate_bpm'],
        vitals['Temperature_C'],
        vitals['Respiration_Rate_bpm']
    ], dtype=float).reshape(1,-1)
    X_scaled = scaler.transform(X_input)
    pred = best_model.predict(X_scaled)[0]
    state = "‚ö†Ô∏è DANGER STATE" if pred==1 else "‚úÖ NORMAL STATE"
    print("üìà Health Status:", state)
    return vitals, state

# Step 9: Define evaluate_health_condition
def evaluate_health_condition(vitals):
    """
    Evaluates patient's condition based on extracted vitals.
    vitals: dict with keys [Systolic_BP_mmHg, Diastolic_BP_mmHg, SpO2_percent, 
                            Heart_Rate_bpm, Temperature_C, Respiration_Rate]
    """
    sbp = vitals.get("Systolic_BP_mmHg")
    dbp = vitals.get("Diastolic_BP_mmHg")
    spo2 = vitals.get("SpO2_percent")
    hr = vitals.get("Heart_Rate_bpm")
    temp = vitals.get("Temperature_C")
    resp = vitals.get("Respiration_Rate")

    alerts = []

    # Blood Pressure
    if sbp is None or dbp is None:
        alerts.append("‚ö†Ô∏è Blood Pressure data missing")
    else:
        if sbp > 140 or dbp > 90:
            alerts.append("‚ö†Ô∏è High Blood Pressure (Hypertension)")
        elif sbp < 90 or dbp < 60:
            alerts.append("‚ö†Ô∏è Low Blood Pressure (Hypotension)")

    # SpO2
    if spo2 is not None:
        if spo2 < 90:
            alerts.append("‚ö†Ô∏è Critical Low SpO‚ÇÇ")
        elif spo2 < 95:
            alerts.append("‚ö†Ô∏è Mild Low SpO‚ÇÇ")

    # Heart Rate
    if hr is not None:
        if hr < 60:
            alerts.append("‚ö†Ô∏è Bradycardia (Low HR)")
        elif hr > 100:
            alerts.append("‚ö†Ô∏è Tachycardia (High HR)")

    # Temperature
    if temp is not None:
        if temp > 37.5:
            alerts.append("‚ö†Ô∏è Fever")
        elif temp < 35.0:
            alerts.append("‚ö†Ô∏è Hypothermia")

    # Respiration
    if resp is not None:
        if resp < 12:
            alerts.append("‚ö†Ô∏è Low Respiration Rate")
        elif resp > 20:
            alerts.append("‚ö†Ô∏è High Respiration Rate")

    if not alerts:
        print("‚úÖ Patient vitals are within normal range.")
    else:
        print("üö® Alerts:")
        for a in alerts:
            print(a)

# Step 10: Run Inference
def predict_from_image(image_path):
    if not os.path.exists(image_path):
        print("‚ùå Image path does not exist!")
        return

    print("‚úÖ Image found!")

    # Preprocess and run OCR
    img, thresh = _preprocess_image(image_path)
    ocr_items = _easyocr_items(img)  # ‚úÖ returns dicts with cx, cy, conf

    # ‚úÖ Try bounding-box BP extraction first
    sbp, dbp, bp_src = extract_bp_from_boxes(ocr_items, img, debug=True)

    vitals = {
        'Systolic_BP_mmHg': sbp,
        'Diastolic_BP_mmHg': dbp,
        'Heart_Rate_bpm': choose_zone_value(ocr_items, "HR", (30, 220)),
        'SpO2_percent': choose_zone_value(ocr_items, "SPO2", (40, 100)),
        'Temperature_C': choose_zone_value(ocr_items, "TEMP", (30.0, 45.0), prefer_decimal=True),
        'Respiration_Rate_bpm': choose_zone_value(ocr_items, "RESP", (3, 60)),
    }

    print("ü©∫ Extracted Vitals:", vitals)

    # Evaluate health
    evaluate_health_condition(vitals)

# Step 11: Execute
image_path = r"C:\Users\ABC\Desktop\AIML\Medi-Alert Project\Medi-alert\OG dataset\-1.png"
predict_from_image(image_path)

# Step 12: Deploy using Pickle
filename = "Medi-alert.sav"
pickle.dump(best_model,open(filename,"wb"))
pickle.dump(scaler, open("scaler.pkl", "wb"))

loaded_model=pickle.load(open('Medi-alert.sav','rb'))

üî¨ Random Forest Accuracy: 1.0000
üî¨ KNN Accuracy: 0.8750
üî¨ SVM Accuracy: 0.9200
üî¨ Logistic Regression Accuracy: 0.7725
üî¨ Naive Bayes Accuracy: 0.8500
üî¨ Decision Tree Accuracy: 1.0000

üèÜ Best Model Selected: Random Forest with Accuracy = 1.0000
‚úÖ Image found!
[BP] 6 tokens in BP zone (conf>=0.3): [('mmHg', 1.0), ('mLL', 1.0), ('57', 1.0), ('1.0', 1.0), ('TEMP', 1.0), ('%', 1.0)]
[BP crop tesseract] raw: '1.0\n'
[BP global numeric tokens] [('23-09-2018', 23.0), ('23-09-2018', 9.0), ('23-09-2018', 2018.0), ('35*C', 35.0), ('10:11 AM', 10.0), ('10:11 AM', 11.0), ('72', 72.0), ('LLLL96', 96.0), ('57', 57.0), ('1.0', 1.0), ('36.5 3,3', 36.5), ('36.5 3,3', 3.0), ('36.5 3,3', 3.0), ('1.0', 1.0), ('93', 93.0), ('93', 93.0), ('15', 15.0), ('39.8', 39.8)]
ü©∫ Extracted Vitals: {'Systolic_BP_mmHg': 96, 'Diastolic_BP_mmHg': 57, 'Heart_Rate_bpm': 72.0, 'SpO2_percent': 93, 'Temperature_C': 36, 'Respiration_Rate_bpm': 15}
üö® Alerts:
‚ö†Ô∏è Low Blood Pressure (Hypotension)
‚ö†Ô

In [2]:
import time
import boto3
import pyautogui

s3 = boto3.client('s3')
bucket_name = "medi-alert-mpm-screenshots"

def capture_and_upload():
    screenshot = pyautogui.screenshot()
    filename = f"mpm_capture_{int(time.time())}.png"
    screenshot.save(filename)
    s3.upload_file(filename, bucket_name, filename)
    print("Uploaded:", filename)

while True:
    capture_and_upload()
    time.sleep(5)  # Capture every 10 seconds

S3UploadFailedError: Failed to upload mpm_capture_1757992326.png to medi-alert-mpm-screenshots/mpm_capture_1757992326.png: An error occurred (InvalidAccessKeyId) when calling the PutObject operation: The AWS Access Key Id you provided does not exist in our records.