In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
THRESH_SPO2_LOW = 92
THRESH_SPO2_CRITICAL = 88
TREND_WINDOW = 6  # Number of readings for short-term trend analysis

LAB_THRESHOLDS = {
    'blood_glucose': {
        'low': 70,       # Hypoglycemia
        'normal_low': 70,
        'normal_high': 140,
        'high': 200,     # Hyperglycemia
        'critical': 250  # For DKA suspicion
    },
    'bicarbonate': {
        'normal_low': 22,
        'normal_high': 28,
        'low': 18,       # Mild acidosis
        'critical': 10   # Severe acidosis
    },
    'ketones': {
        'negative': 0.0,
        'low': 0.1,
        'moderate': 0.5,
        'high': 1.0,     # Indicates ketosis
        'critical': 3.0  # Strong indicator of DKA
    },
    'ph': {
        'normal_low': 7.35,
        'normal_high': 7.45,
        'low': 7.30,     # Acidosis
        'critical': 7.20 # Severe acidosis
    }
}

AGE_THRESHOLDS = {
    'neonate': {
        'rr_low': 30, 'rr_normal': 40, 'rr_high': 60,
        'hr_low': 100, 'hr_normal': 140, 'hr_high': 160,
        'sbp_low': 60, 'sbp_normal': 70, 'sbp_high': 90,
        'temp_low': 36.0, 'temp_normal': 37.2, 'temp_high': 38.0
    },
    'infant': {
        'rr_low': 24, 'rr_normal': 30, 'rr_high': 40,
        'hr_low': 80, 'hr_normal': 120, 'hr_high': 140,
        'sbp_low': 70, 'sbp_normal': 85, 'sbp_high': 100,
        'temp_low': 36.0, 'temp_normal': 37.2, 'temp_high': 38.0
    },
    'child': {
        'rr_low': 16, 'rr_normal': 20, 'rr_high': 30,
        'hr_low': 70, 'hr_normal': 90, 'hr_high': 110,
        'sbp_low': 80, 'sbp_normal': 95, 'sbp_high': 110,
        'temp_low': 36.0, 'temp_normal': 37.0, 'temp_high': 38.0
    },
    'adolescent': {
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 20,
        'hr_low': 60, 'hr_normal': 75, 'hr_high': 100,
        'sbp_low': 90, 'sbp_normal': 105, 'sbp_high': 120,
        'temp_low': 35.8, 'temp_normal': 36.8, 'temp_high': 37.8
    },
    'adult': {
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 20,
        'hr_low': 60, 'hr_normal': 80, 'hr_high': 100,
        'sbp_low': 90, 'sbp_normal': 115, 'sbp_high': 130,
        'temp_low': 35.5, 'temp_normal': 36.8, 'temp_high': 38.0
    },
    'geriatric': {
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 24,
        'hr_low': 55, 'hr_normal': 70, 'hr_high': 90,
        'sbp_low': 90, 'sbp_normal': 125, 'sbp_high': 140,
        'temp_low': 35.5, 'temp_normal': 36.5, 'temp_high': 37.5
    }
}

In [3]:
def assign_age_category(df):
    df = df.copy()
    def _categorize(age):
        if age <= 0.083: return 'neonate'     # < 1 month (~0.083 years)
        elif age <= 1:   return 'infant'      # 1 month - 1 year
        elif age < 5:    return 'child'       # 1 - 5 years
        elif age < 13:   return 'adolescent'  # 5 - 12 years
        elif age < 65:   return 'adult'       # 13 - 64 years
        else:            return 'geriatric'   # 65+ years
    if 'age' in df.columns:
        df['age_category'] = df['age'].apply(_categorize)
    else:
        df['age_category'] = 'adult'
    return df

In [4]:
def apply_vital_range_flags(df):
    df = df.copy()
    df = assign_age_category(df)
    # SpO2 thresholds
    df['flag_spo2_low'] = df.get('spo2', pd.Series(np.nan, index=df.index)) < THRESH_SPO2_LOW
    df['flag_spo2_critical'] = df.get('spo2', pd.Series(np.nan, index=df.index)) < THRESH_SPO2_CRITICAL
    # Temperature flags (age-specific)
    df['temperature'] = df.get('temperature', pd.Series(np.nan, index=df.index))
    df['flag_temp_high'] = df.apply(lambda row: row['temperature'] >= AGE_THRESHOLDS[row['age_category']]['temp_high'] if not pd.isna(row['temperature']) else False, axis=1)
    df['flag_temp_low'] = df.apply(lambda row: row['temperature'] < AGE_THRESHOLDS[row['age_category']]['temp_low'] if not pd.isna(row['temperature']) else False, axis=1)
    # RR flags
    df['resp_rate'] = df.get('resp_rate', pd.Series(np.nan, index=df.index))
    df['flag_rr_low'] = df.apply(lambda row: row['resp_rate'] < AGE_THRESHOLDS[row['age_category']]['rr_low'] if not pd.isna(row['resp_rate']) else False, axis=1)
    df['flag_rr_high'] = df.apply(lambda row: row['resp_rate'] >= AGE_THRESHOLDS[row['age_category']]['rr_high'] if not pd.isna(row['resp_rate']) else False, axis=1)
    # HR flags
    df['heart_rate'] = df.get('heart_rate', pd.Series(np.nan, index=df.index))
    df['flag_hr_low'] = df.apply(lambda row: row['heart_rate'] < AGE_THRESHOLDS[row['age_category']]['hr_low'] if not pd.isna(row['heart_rate']) else False, axis=1)
    df['flag_hr_high'] = df.apply(lambda row: row['heart_rate'] >= AGE_THRESHOLDS[row['age_category']]['hr_high'] if not pd.isna(row['heart_rate']) else False, axis=1)
    # BP flags
    df['sbp'] = df.get('sbp', pd.Series(np.nan, index=df.index))
    df['dbp'] = df.get('dbp', pd.Series(np.nan, index=df.index))
    df['flag_sbp_low'] = df.apply(lambda row: row['sbp'] < AGE_THRESHOLDS[row['age_category']]['sbp_low'] if not pd.isna(row['sbp']) else False, axis=1)
    df['flag_sbp_high'] = df.apply(lambda row: row['sbp'] >= AGE_THRESHOLDS[row['age_category']]['sbp_high'] if not pd.isna(row['sbp']) else False, axis=1)
    # Shock index (simple)
    df['shock_index'] = df['heart_rate'] / np.clip(df['sbp'], a_min=1, a_max=None)
    return df

In [5]:
def compute_recent_trends_delta(df, trend_window=TREND_WINDOW):
    df = df.copy().sort_values("timestamp").reset_index(drop=True)
    if 'age_category' not in df.columns:
        df = assign_age_category(df)
    trends = {}
    recent = df.tail(trend_window)
    if recent.empty:
        return trends
    age_group = recent['age_category'].iloc[-1]
    thresholds = AGE_THRESHOLDS[age_group]
    # for each vital compute avg delta and flag using similar logic as user's function
    for vital in ['resp_rate', 'heart_rate', 'sbp', 'temperature', 'spo2']:
        if vital not in recent.columns or recent[vital].isnull().all():
            continue
        y = recent[vital].dropna().values
        if len(y) < 2:
            continue
        avg_delta = float(np.mean(np.diff(y)))
        latest = float(y[-1])
        trends[f"{vital}_trend"] = round(avg_delta, 3)
        # Simplified flag logic consistent with user's approach
        if vital == 'spo2':
            if latest < THRESH_SPO2_LOW:
                flag = "Still abnormal — but improving" if avg_delta > 0 else ("Abnormal and worsening" if avg_delta < 0 else "Abnormal and flat")
            else:
                flag = "Normal but deteriorating" if avg_delta < 0 else "Normal and stable"
        else:
            # map keys
            if vital == 'resp_rate':
                low, normal, high = thresholds['rr_low'], thresholds['rr_normal'], thresholds['rr_high']
            elif vital == 'heart_rate':
                low, normal, high = thresholds['hr_low'], thresholds['hr_normal'], thresholds['hr_high']
            elif vital == 'sbp':
                low, normal, high = thresholds['sbp_low'], thresholds['sbp_normal'], thresholds['sbp_high']
            elif vital == 'temperature':
                low, normal, high = thresholds['temp_low'], thresholds['temp_normal'], thresholds['temp_high']
            else:
                low, normal, high = (None, None, None)
            if (low is not None) and (latest < low or latest > high):
                if (latest > high and avg_delta < 0) or (latest < low and avg_delta > 0):
                    flag = "Still abnormal — but improving"
                else:
                    flag = "Abnormal and worsening"
            else:
                flag = "Normal but deteriorating" if avg_delta < 0 else "Normal and stable"
        trends[f"{vital}_trend_flag"] = flag
    # SBP slope and HR burden helpers
    if 'timestamp' in recent.columns and 'sbp' in recent.columns and recent['sbp'].notnull().sum() >= 2:
        # compute slope in mmHg/hour using linear regression
        times = (recent['timestamp'] - recent['timestamp'].iloc[0]).dt.total_seconds() / 3600.0  # hours from start
        coeffs = np.polyfit(times, recent['sbp'].values, 1)
        sbp_slope = float(coeffs[0])
        trends['sbp_slope_mmHg_per_hr'] = round(sbp_slope, 3)
    if 'heart_rate' in recent.columns and recent['heart_rate'].notnull().any():
        hr_burden_pct = float((recent['heart_rate'] >= 100).mean())
        trends['hr_burden_pct'] = round(hr_burden_pct, 3)
    return trends

In [6]:
def apply_lab_flags(df, lab_thresholds=LAB_THRESHOLDS):
    """
    Applies flags based on lab results. Adds columns like 'flag_bgl_high', 'flag_bicarb_low', etc.
    """
    df = df.copy()
    
    # Blood Glucose
    if 'blood_glucose' in df.columns:
        df['flag_bgl_low'] = df['blood_glucose'] < lab_thresholds['blood_glucose']['low']
        df['flag_bgl_high'] = df['blood_glucose'] > lab_thresholds['blood_glucose']['high']
        df['flag_bgl_critical'] = df['blood_glucose'] > lab_thresholds['blood_glucose']['critical']
    
    # Bicarbonate (HCO3)
    if 'bicarbonate' in df.columns:
        df['flag_bicarb_low'] = df['bicarbonate'] < lab_thresholds['bicarbonate']['normal_low']
        df['flag_bicarb_critical'] = df['bicarbonate'] < lab_thresholds['bicarbonate']['critical']
    
    # Ketones
    if 'ketones' in df.columns:
        df['flag_ketones_high'] = df['ketones'] > lab_thresholds['ketones']['high']
        df['flag_ketones_critical'] = df['ketones'] > lab_thresholds['ketones']['critical']
    
    # pH
    if 'ph' in df.columns:
        df['flag_ph_low'] = df['ph'] < lab_thresholds['ph']['normal_low']
        df['flag_ph_critical'] = df['ph'] < lab_thresholds['ph']['critical']
    
    return df

In [7]:
def detect_dka_suspicion(df, now=None, window_hours=6):
    """
    Returns a dict with suspicion_score, level, flags, metrics, and recommended action.
    Corrected logic: Requires hyperglycemia (BGL>=250) and uses a points-based system
    with age-specific thresholds. Now includes lab report analysis.
    """
    df = df.copy()
    if now is None:
        now = df['timestamp'].max() if 'timestamp' in df.columns else pd.Timestamp.now()
    window_start = pd.to_datetime(now) - pd.Timedelta(hours=window_hours)
    w = df[(df['timestamp'] >= window_start) & (df['timestamp'] <= pd.to_datetime(now))].copy()
    if w.empty:
        return {'error': 'No data in the requested window', 'window_start': window_start, 'window_end': now}

    # Ensure flags and trends are present
    w = apply_vital_range_flags(w)
    w = apply_lab_flags(w)  # NEW: Apply lab value flags
    trends = compute_recent_trends_delta(w, trend_window=TREND_WINDOW)

    # 1. CHECK PREREQUISITE: HYPERGLYCEMIA (using lab flag)
    # Check if critical hyperglycemia was ever flagged in the window
    has_critical_hyperglycemia = w['flag_bgl_critical'].any() if 'flag_bgl_critical' in w.columns else False
    
    # Fallback: If the flag doesn't exist, calculate it manually
    if not has_critical_hyperglycemia and 'blood_glucose' in w.columns:
        recent_bgl = w['blood_glucose'].dropna()
        if not recent_bgl.empty:
            has_critical_hyperglycemia = recent_bgl.iloc[-1] >= 250

    if not has_critical_hyperglycemia:
        # No hyperglycemia, no DKA suspicion regardless of other signs.
        return {
            'patient_id': w.get('patient_id', pd.Series([None])).iloc[-1] if 'patient_id' in w.columns else None,
            'window_start': window_start,
            'window_end': now,
            'suspicion_score': 0,
            'level': 'Low',
            'flags': ['no_hyperglycemia'],
            'metrics': {'blood_glucose': w['blood_glucose'].iloc[-1] if 'blood_glucose' in w.columns else None},
            'safety_override': False,
            'safety_reasons': [],
            'recommended_action': ['DKA unlikely without hyperglycemia.'],
            'trends': trends
        }

    # 2. INITIALIZE SCORE. Hyperglycemia is confirmed.
    score = 0
    flags_out = ['hyperglycemia_bgl_>=250']

    # 3. ASSESS VITAL SIGNS (Using the AGE-SPECIFIC flags already computed)
    # Kussmaul Breathing (RR)
    if w['flag_rr_high'].any():
        score += 1
        flags_out.append('tachypnea_kussmaul')

    # Tachycardia (HR) - Use the pre-computed AGE-SPECIFIC high HR flag for burden
    hr_burden_pct = w['flag_hr_high'].mean()
    if hr_burden_pct >= 0.5:
        score += 1
        flags_out.append('sustained_tachycardia')

    # Falling BP Trend
    sbp_slope = trends.get('sbp_slope_mmHg_per_hr', 0)
    if sbp_slope is not None and sbp_slope < -1.0:
        score += 1
        flags_out.append('falling_bp_trend')

    # No High Fever (Context Modifier)
    if not w['flag_temp_high'].any():
        score += 1
        flags_out.append('no_high_fever')
    else:
        score = max(0, score - 1)
        flags_out.append('high_fever_present_caution')

    # 4. ASSESS LAB VALUES (NEW SECTION)
    # Critical Acidosis (pH)
    if 'flag_ph_critical' in w.columns and w['flag_ph_critical'].any():
        score += 2  # Strong evidence
        flags_out.append('critical_acidosis_ph')
    elif 'flag_ph_low' in w.columns and w['flag_ph_low'].any():
        score += 1  # Some evidence
        flags_out.append('acidosis_ph_low')

    # Critical Bicarbonate Loss
    if 'flag_bicarb_critical' in w.columns and w['flag_bicarb_critical'].any():
        score += 2  # Strong evidence
        flags_out.append('critical_bicarb_low')
    elif 'flag_bicarb_low' in w.columns and w['flag_bicarb_low'].any():
        score += 1  # Some evidence
        flags_out.append('bicarb_low')

    # Ketones
    if 'flag_ketones_critical' in w.columns and w['flag_ketones_critical'].any():
        score += 2  # Strong evidence
        flags_out.append('critical_ketonemia')
    elif 'flag_ketones_high' in w.columns and w['flag_ketones_high'].any():
        score += 1  # Some evidence
        flags_out.append('ketonemia')

    # 5. SAFETY OVERRIDES
    safety_override = False
    safety_reasons = []
    
    # Absolute Hypotension
    min_sbp = w['sbp'].min()
    if min_sbp < 90:
        safety_override = True
        safety_reasons.append('hypotension_sbp<90')
        flags_out.append('hypotension_sbp<90')

    # Critical Acidosis (pH < 7.2) is also a safety override
    if 'flag_ph_critical' in w.columns and w['flag_ph_critical'].any():
        safety_override = True
        safety_reasons.append('critical_acidosis_ph<7.2')
        # Flag already added above

    # Altered Mental Status (if data available)
    if 'altered_mental_status' in w.columns and w['altered_mental_status'].any():
        safety_override = True
        safety_reasons.append('altered_mental_status')
        flags_out.append('altered_mental_status')
        score += 2  # Also add to score as it's a severe sign

    # 6. DETERMINE LEVEL OF SUSPICION
    level = 'Low'
    if safety_override:
        level = 'High'
    elif score >= 4:
        level = 'High'
    elif score >= 2:
        level = 'Moderate'

    # 7. RECOMMENDED ACTIONS
    recommended_action = []
    if level in ['Moderate', 'High']:
        recommended_action.append('URGENT: Check blood ketones (serum or urine)')
        recommended_action.append('Obtain VBG/ABG for pH and bicarbonate level')
        
        # Tailor fluid advice based on BP
        if min_sbp < 90:
            recommended_action.append('Initiate IV access and begin rapid fluid resuscitation')
        else:
            recommended_action.append('Initiate IV access and begin fluid resuscitation protocol')
            
    if level == 'High':
        recommended_action.append('Alert medical team immediately for possible DKA protocol activation.')
        recommended_action.append('Consider ICU transfer.')
    if safety_override:
        recommended_action.append('CRITICAL: Safety override triggered. Requires immediate physician assessment.')

    # 8. PREPARE METRICS
    metrics = {
        'blood_glucose': w['blood_glucose'].iloc[-1] if 'blood_glucose' in w.columns else None,
        'hr_burden_pct': round(hr_burden_pct, 3),
        'sbp_slope_mmHg_per_hr': sbp_slope,
    }
    # Add lab metrics if they exist
    for lab_param in ['bicarbonate', 'ketones', 'ph']:
        if lab_param in w.columns:
            metrics[lab_param] = w[lab_param].iloc[-1]

    # 9. RETURN RESULTS
    result = {
        'patient_id': w.get('patient_id', pd.Series([None])).iloc[-1] if 'patient_id' in w.columns else None,
        'window_start': window_start,
        'window_end': now,
        'suspicion_score': score,
        'level': level,
        'flags': flags_out,
        'metrics': metrics,
        'safety_override': safety_override,
        'safety_reasons': safety_reasons,
        'recommended_action': recommended_action,
        'trends': trends
    }
    return result

In [8]:
if __name__ == "__main__":
    print("Testing DKA Suspicion Pipeline...")
    
    # Create simple sample data for an adult with HIGH DKA suspicion
    sample_timestamps = pd.date_range(start='2024-05-27 08:00', periods=6, freq='1h') # 6 hours of data
    example_data = pd.DataFrame({
        'patient_id': ['TEST_PATIENT_1'] * 6,
        'timestamp': sample_timestamps,
        'blood_glucose': [380, 395, 410, 425, 440, 455],  # Critical hyperglycemia
        'heart_rate': [110, 112, 115, 118, 120, 122],     # Tachycardia
        'resp_rate': [24, 25, 26, 26, 27, 28],            # Tachypnea (Kussmaul breathing)
        'sbp': [120, 118, 115, 112, 108, 105],            # Falling trend
        'dbp': [80, 78, 76, 74, 72, 70],
        'temperature': [37.0, 37.1, 37.0, 37.2, 37.1, 37.0], # No fever
        'spo2': [97, 97, 96, 96, 97, 97],
        'age': [35, 35, 35, 35, 35, 35]  # Adult
    })
    
    print("Sample Data Created:")
    print(example_data[['timestamp', 'blood_glucose', 'heart_rate', 'resp_rate', 'sbp']])
    print("\n" + "="*50 + "\n")
    
    # Run the DKA suspicion assessment
    try:
        result = detect_dka_suspicion(example_data)
        print("DKA Suspicion Assessment Result:")
        print(f"Patient ID: {result.get('patient_id', 'N/A')}")
        print(f"Suspicion Score: {result['suspicion_score']}")
        print(f"Level: {result['level']}")
        print(f"Flags: {result['flags']}")
        print(f"Safety Override: {result['safety_override']}")
        print(f"Recommended Actions: {result['recommended_action']}")
        print(f"Metrics - BGL: {result['metrics'].get('blood_glucose')}, HR Burden: {result['metrics'].get('hr_burden_pct')}")
        
    except Exception as e:
        print(f"Error running the function: {e}")
        import traceback
        traceback.print_exc()

Testing DKA Suspicion Pipeline...
Sample Data Created:
            timestamp  blood_glucose  heart_rate  resp_rate  sbp
0 2024-05-27 08:00:00            380         110         24  120
1 2024-05-27 09:00:00            395         112         25  118
2 2024-05-27 10:00:00            410         115         26  115
3 2024-05-27 11:00:00            425         118         26  112
4 2024-05-27 12:00:00            440         120         27  108
5 2024-05-27 13:00:00            455         122         28  105


DKA Suspicion Assessment Result:
Patient ID: TEST_PATIENT_1
Suspicion Score: 4
Level: High
Flags: ['hyperglycemia_bgl_>=250', 'tachypnea_kussmaul', 'sustained_tachycardia', 'falling_bp_trend', 'no_high_fever']
Safety Override: False
Recommended Actions: ['URGENT: Check blood ketones (serum or urine)', 'Obtain VBG/ABG for pH and bicarbonate level', 'Initiate IV access and begin fluid resuscitation protocol', 'Alert medical team immediately for possible DKA protocol activation.', 'Cons

In [9]:
now = pd.Timestamp.now().floor('min')
timestamps = [now - pd.Timedelta(hours=6) + pd.Timedelta(minutes=60*i) for i in range(7)]  # 0h..6h, 7 points
data = {
    'timestamp': timestamps,
    'age': [0.5]*7,  # 6 months old -> infant category (but treat as adult-like for demo) - using infant here
    'patient_id': ['demo123']*7,
    # RR elevated (Kussmaul-like)
    'resp_rate': [24, 25, 26, 27, 26, 25, 24],
    # HR mostly >=100 for >50% of window (4/7 points >=100 => ~57%)
    'heart_rate': [95, 102, 108, 110, 105, 98, 92],
    # SBP falling from 120 -> 100 (cumulative drop 20 mmHg)
    'sbp': [120, 118, 115, 112, 108, 104, 100],
    'temperature': [36.5, 36.6, 36.4, 36.7, 36.6, 36.5, 36.6],
    'spo2': [97, 96, 96, 95, 96, 95, 96],
    # supportive signs
    'vomiting': [False, False, True, True, False, False, False],
    'dry_mucosa': [True, True, True, True, True, True, True],
    'altered_mental_status': [False]*7
}
df_demo = pd.DataFrame(data)

# Run detection on synthetic dataset
result_demo = detect_dka_suspicion(df_demo, now=now, window_hours=6)
print(result_demo)

{'patient_id': 'demo123', 'window_start': Timestamp('2025-11-08 10:25:00'), 'window_end': Timestamp('2025-11-08 16:25:00'), 'suspicion_score': 0, 'level': 'Low', 'flags': ['no_hyperglycemia'], 'metrics': {'blood_glucose': None}, 'safety_override': False, 'safety_reasons': [], 'recommended_action': ['DKA unlikely without hyperglycemia.'], 'trends': {'resp_rate_trend': -0.2, 'resp_rate_trend_flag': 'Normal but deteriorating', 'heart_rate_trend': -2.0, 'heart_rate_trend_flag': 'Normal but deteriorating', 'sbp_trend': -3.6, 'sbp_trend_flag': 'Normal but deteriorating', 'temperature_trend': 0.0, 'temperature_trend_flag': 'Normal and stable', 'spo2_trend': 0.0, 'spo2_trend_flag': 'Normal and stable', 'sbp_slope_mmHg_per_hr': -3.629, 'hr_burden_pct': 0.667}}
