In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

In [2]:
THRESH_SPO2_LOW = 92
THRESH_SPO2_CRITICAL = 88
THRESH_SHOCK_INDEX_WARNING = 0.9
THRESH_SHOCK_INDEX_CRITICAL = 1.0
THRESH_MAP_LOW = 65  # Mean Arterial Pressure threshold for shock
THRESH_LACTATE_HIGH = 2.0  # mmol/L for tissue hypoperfusion
THRESH_LACTATE_CRITICAL = 4.0
THRESH_UO_LOW = 0.5  # mL/kg/hr (oliguria)

# Trend Analysis
TREND_WINDOW = 6  # Number of readings for short-term trend analysis


# AGE-SPECIFIC VITAL SIGN THRESHOLDS (Low, Normal, High)

AGE_THRESHOLDS = {
    'neonate': {
        'rr_low': 30, 'rr_normal': 40, 'rr_high': 60,
        'hr_low': 100, 'hr_normal': 140, 'hr_high': 160,
        'sbp_low': 60, 'sbp_normal': 70, 'sbp_high': 90,
        'temp_low': 36.0, 'temp_normal': 37.2, 'temp_high': 38.0
    },
    'infant': {
        'rr_low': 24, 'rr_normal': 30, 'rr_high': 40,
        'hr_low': 80, 'hr_normal': 120, 'hr_high': 140,
        'sbp_low': 70, 'sbp_normal': 85, 'sbp_high': 100,
        'temp_low': 36.0, 'temp_normal': 37.2, 'temp_high': 38.0
    },
    'child': {
        'rr_low': 16, 'rr_normal': 20, 'rr_high': 30,
        'hr_low': 70, 'hr_normal': 90, 'hr_high': 110,
        'sbp_low': 80, 'sbp_normal': 95, 'sbp_high': 110,
        'temp_low': 36.0, 'temp_normal': 37.0, 'temp_high': 38.0
    },
    'adolescent': {
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 20,
        'hr_low': 60, 'hr_normal': 75, 'hr_high': 100,
        'sbp_low': 90, 'sbp_normal': 105, 'sbp_high': 120,
        'temp_low': 35.8, 'temp_normal': 36.8, 'temp_high': 37.8
    },
    'adult': {
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 20,
        'hr_low': 60, 'hr_normal': 80, 'hr_high': 100,
        'sbp_low': 90, 'sbp_normal': 115, 'sbp_high': 130,
        'temp_low': 35.5, 'temp_normal': 36.8, 'temp_high': 38.0
    },
    'geriatric': {  # Added for elderly patients who may have different baselines
        'rr_low': 12, 'rr_normal': 16, 'rr_high': 24, # Often higher RR baseline
        'hr_low': 55, 'hr_normal': 70, 'hr_high': 90, # Often lower HR
        'sbp_low': 90, 'sbp_normal': 125, 'sbp_high': 140, # Often higher SBP
        'temp_low': 35.5, 'temp_normal': 36.5, 'temp_high': 37.5 # Often lower temp
    }
}

In [3]:
def assign_age_category(df):
    """
    Assigns an age category based on the 'age' column in the DataFrame.
    Now includes more granular pediatric categories and a geriatric category.
    """
    df = df.copy()
    
    def _categorize(age):
        if age <= 0.083: return 'neonate'     # < 1 month
        elif age <= 1:   return 'infant'      # 1 month - 1 year
        elif age < 5:    return 'child'       # 1 - 5 years
        elif age < 13:   return 'adolescent'  # 5 - 12 years
        elif age < 65:   return 'adult'       # 13 - 64 years
        else:            return 'geriatric'   # 65+ years
    
    if 'age' in df.columns:
        df['age_category'] = df['age'].apply(_categorize)
    else:
        # Default to adult if age is not provided
        df['age_category'] = 'adult'
    
    return df

In [4]:
def apply_vital_range_flags(df):
    """
    Applies age-specific thresholds to flag abnormal vital signs.
    Now includes flags for all parameters needed across pipelines.
    """
    df = df.copy()
    df = assign_age_category(df)  # Inject age group

    # SpO₂ flags (Absolute threshold)
    df['flag_spo2_low'] = df['spo2'] < THRESH_SPO2_LOW
    df['flag_spo2_critical'] = df['spo2'] < THRESH_SPO2_CRITICAL

    # Temperature flags
    df['flag_temp_high'] = df.apply(lambda row: row['temperature'] >= AGE_THRESHOLDS[row['age_category']]['temp_high'], axis=1)
    df['flag_temp_low'] = df.apply(lambda row: row['temperature'] < AGE_THRESHOLDS[row['age_category']]['temp_low'], axis=1)

    # Respiratory Rate flags
    df['flag_rr_low'] = df.apply(lambda row: row['rr'] < AGE_THRESHOLDS[row['age_category']]['rr_low'], axis=1)
    df['flag_rr_high'] = df.apply(lambda row: row['rr'] >= AGE_THRESHOLDS[row['age_category']]['rr_high'], axis=1)

    # Heart Rate flags
    df['flag_hr_low'] = df.apply(lambda row: row['hr'] < AGE_THRESHOLDS[row['age_category']]['hr_low'], axis=1)
    df['flag_hr_high'] = df.apply(lambda row: row['hr'] >= AGE_THRESHOLDS[row['age_category']]['hr_high'], axis=1)

    # Calculate Shock Index (handle division by zero)
    df['shock_index'] = df['hr'] / np.clip(df['sbp'], a_min=1, a_max=None)    
    # Flag based on Shock Index
    df['flag_si_warning'] = df['shock_index'] >= THRESH_SHOCK_INDEX_WARNING
    df['flag_si_critical'] = df['shock_index'] >= THRESH_SHOCK_INDEX_CRITICAL

    # Blood Pressure flags
    df['flag_sbp_low'] = df.apply(lambda row: row['sbp'] < AGE_THRESHOLDS[row['age_category']]['sbp_low'], axis=1)
    df['flag_sbp_high'] = df.apply(lambda row: row['sbp'] >= AGE_THRESHOLDS[row['age_category']]['sbp_high'], axis=1)
    df['flag_dbp_low'] = df.apply(lambda row: row['dbp'] < (AGE_THRESHOLDS[row['age_category']]['sbp_low'] * 0.6), axis=1) # Estimate DBP low
    df['flag_dbp_high'] = df.apply(lambda row: row['dbp'] >= (AGE_THRESHOLDS[row['age_category']]['sbp_high'] * 0.6), axis=1) # Estimate DBP high

    return df

In [5]:
def compute_recent_trends_delta(df):
    """
    Computes trends for each vital by differencing consecutive readings.
    Applies stricter interpretation using age-specific thresholds.
    """
    df = df.copy().sort_values("timestamp").reset_index(drop=True)

    if 'age_category' not in df.columns:
        df = assign_age_category(df)

    trends = {}
    recent = df.tail(TREND_WINDOW)
    age_group = recent['age_category'].iloc[-1]
    thresholds = AGE_THRESHOLDS[age_group]

    vital_map = {
        'rr': ('rr_low', 'rr_normal', 'rr_high'),
        'hr': ('hr_low', 'hr_normal', 'hr_high'),
        'sbp': ('sbp_low', 'sbp_normal', 'sbp_high'),
        'temperature': ('temp_low', 'temp_normal', 'temp_high'),
        'spo2': (None, None, None)  # handled separately
    }

    for vital in ['rr', 'hr', 'sbp', 'temperature', 'spo2']:
        if vital not in recent.columns or recent[vital].isnull().all():
            continue

        y = recent[vital].dropna().values
        if len(y) < 2:
            continue

        avg_delta = np.mean(np.diff(y))
        latest = y[-1]
        trends[f"{vital}_trend"] = round(avg_delta, 3)

        if vital == 'spo2':
            if latest < THRESH_SPO2_LOW:
                if avg_delta > 0:
                    flag = "Still abnormal — but improving"
                elif avg_delta < 0:
                    flag = "Abnormal and worsening"
                else:
                    flag = "Abnormal and flat"
            else:
                if avg_delta < 0:
                    flag = "Normal but deteriorating"
                else:
                    flag = "Normal and stable"

        else:
            low_key, norm_key, high_key = vital_map[vital]
            low = thresholds[low_key]
            normal = thresholds[norm_key]
            high = thresholds[high_key]

            if latest < low or latest > high:
                if (latest > high and avg_delta < 0) or (latest < low and avg_delta > 0):
                    flag = "Still abnormal — but improving"
                else:
                    flag = "Abnormal and worsening"
            else:
                if avg_delta < 0:
                    flag = "Normal but deteriorating"
                else:
                    flag = "Normal and stable"

        trends[f"{vital}_trend_flag"] = flag

    # Shock Index trend
    if all(col in recent.columns for col in ['hr', 'sbp']):
        hr = recent['hr'].values
        sbp = np.clip(recent['sbp'].values, a_min=1, a_max=None)
        si = hr / sbp

        if len(si) >= 2:
            avg_si_delta = np.mean(np.diff(si))
            trends['shock_index_trend'] = round(avg_si_delta, 3)

            latest_si = si[-1]
            if latest_si >= THRESH_SHOCK_INDEX_WARNING:
                flag = "Shock Index high — improving" if avg_si_delta < 0 else "Shock Index high — worsening"
            else:
                flag = "Normal but improving" if avg_si_delta < 0 else "Normal but rising"

            trends['shock_index_trend_flag'] = flag

    return trends

In [6]:
def _find_sustained_runs(df, col, thresh, operator="<", min_duration_minutes=30):
    """
    Find sustained runs where `col` op thresh holds for at least min_duration_minutes.
    Returns list of (start_ts, end_ts, duration_timedelta, slice_df).
    operator: "<" or "<=" or ">" etc. (only "<" and ">=" used here)
    """
    if df.empty:
        return []

    cond = None
    if operator == "<":
        cond = df[col] < thresh
    elif operator == "<=":
        cond = df[col] <= thresh
    elif operator == ">=":
        cond = df[col] >= thresh
    else:
        raise ValueError("operator must be one of '<','<=','>='")

    runs = []
    df = df.sort_values("timestamp").reset_index(drop=True)
    df['cond'] = cond.astype(int)

    # find contiguous segments where cond==1
    df['grp'] = (df['cond'] != df['cond'].shift(1)).cumsum()
    for _, g in df.groupby('grp'):
        if g['cond'].iloc[0] == 1:
            start = g['timestamp'].iloc[0]
            end = g['timestamp'].iloc[-1]
            duration = end - start
            if duration >= timedelta(minutes=min_duration_minutes):
                runs.append((start, end, duration, g.drop(columns=['cond','grp'])))
    # cleanup
    df.drop(columns=['cond','grp'], inplace=True, errors='ignore')
    return runs

def _mean_or_nan(series):
    s = series.dropna()
    return s.mean() if len(s) > 0 else np.nan


In [7]:
def detect_pneumothorax_suspect(df, spo2_thresh=88, duration_minutes=30,
                                hr_thresh=100, rr_thresh=22, fever_thresh=38.0):
    """
    Detect pneumothorax suspect based on:
      - SpO₂ < 88% sustained for >= 30 min
      - HR >= 100 and RR >= 22 during that period
      - No fever (temperature < 38C) in that same window
    Reuses global trend function for slope interpretation.
    """
    df = df.sort_values("timestamp").reset_index(drop=True)

    sustained_runs = []
    run_start, run_vals = None, []

    # Step 1: Find sustained SpO₂ < threshold runs
    for i, row in df.iterrows():
        if row['spo2'] < spo2_thresh:
            if run_start is None:
                run_start = row['timestamp']
                run_vals = [row]
            else:
                run_vals.append(row)
        else:
            if run_start is not None and run_vals:
                run_end = run_vals[-1]['timestamp']
                duration = (run_end - run_start).total_seconds() / 60
                if duration >= duration_minutes:
                    sustained_runs.append((run_start, run_end, duration, pd.DataFrame(run_vals)))
                run_start, run_vals = None, []

    # Handle trailing run
    if run_start is not None and run_vals:
        run_end = run_vals[-1]['timestamp']
        duration = (run_end - run_start).total_seconds() / 60
        if duration >= duration_minutes:
            sustained_runs.append((run_start, run_end, duration, pd.DataFrame(run_vals)))

    # Step 2: Explicit fever check
    def _temp_ok_for_run(win_df):
        for temp in win_df['temperature']:
            if temp >= fever_thresh:
                return False
        return True

    # Step 3: Evaluate sustained runs
    for start, end, dur, window_df in sustained_runs:
        hr_flag = (window_df['hr'] >= hr_thresh).any()
        rr_flag = (window_df['rr'] >= rr_thresh).any()
        temp_ok = _temp_ok_for_run(window_df)

        if hr_flag and rr_flag and temp_ok:
            # Use your existing trend function
            trends = compute_recent_trends_delta(window_df)

            # Extract slope messages
            spo2_flag = trends.get("spo2_trend_flag", "No SpO₂ trend")
            hr_flag_msg = trends.get("hr_trend_flag", "No HR trend")
            rr_flag_msg = trends.get("rr_trend_flag", "No RR trend")

            return {
                "pneumothorax_suspect": True,
                "start": start,
                "end": end,
                "duration_min": dur,
                "status": (
                    f"Your oxygen stayed low and you’re breathing/heart are fast without infection "
                    f"— possible collapsed lung ({spo2_flag}, {hr_flag_msg}, {rr_flag_msg})."
                ),
                "trends": trends
            }

    return {"pneumothorax_suspect": False, "status": "No evidence for pneumothorax"}


In [8]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Generate sample data
timestamps = [datetime(2025, 9, 19, 8, 0) + timedelta(minutes=5*i) for i in range(60)]  # 5 hours, every 5 min

data = []
for t in timestamps:
    if datetime(2025, 9, 19, 9, 0) <= t <= datetime(2025, 9, 19, 10, 0):  
        # Simulate pneumothorax window (low SpO2, high HR, high RR, no fever)
        spo2 = np.random.randint(84, 87)   # below 88%
        hr   = np.random.randint(105, 120) # high HR
        rr   = np.random.randint(24, 30)   # high RR
        temp = np.random.uniform(36.5, 37.2) # normal temp (no fever)
    else:
        # Normal baseline
        spo2 = np.random.randint(95, 99)
        hr   = np.random.randint(70, 90)
        rr   = np.random.randint(14, 18)
        temp = np.random.uniform(36.5, 37.5)
    data.append([t, spo2, hr, rr, temp])

df_test = pd.DataFrame(data, columns=["timestamp", "spo2", "hr", "rr", "temperature"])

print(df_test.head(15))
result= detect_pneumothorax_suspect(df_test)
print(result)


             timestamp  spo2   hr  rr  temperature
0  2025-09-19 08:00:00    97   88  16    36.795204
1  2025-09-19 08:05:00    95   72  16    36.969092
2  2025-09-19 08:10:00    97   80  16    37.195725
3  2025-09-19 08:15:00    95   76  17    37.162149
4  2025-09-19 08:20:00    98   82  15    36.663407
5  2025-09-19 08:25:00    98   82  17    37.128849
6  2025-09-19 08:30:00    97   71  16    37.090364
7  2025-09-19 08:35:00    96   81  17    37.454940
8  2025-09-19 08:40:00    98   85  17    36.611191
9  2025-09-19 08:45:00    95   86  15    36.855979
10 2025-09-19 08:50:00    98   86  15    37.374172
11 2025-09-19 08:55:00    96   73  15    37.302975
12 2025-09-19 09:00:00    85  110  27    36.530860
13 2025-09-19 09:05:00    85  108  26    36.832571
14 2025-09-19 09:10:00    84  118  26    36.724944
{'pneumothorax_suspect': True, 'start': Timestamp('2025-09-19 09:00:00'), 'end': Timestamp('2025-09-19 10:00:00'), 'duration_min': 60.0, 'status': 'Your oxygen stayed low and you’re br

In [9]:
# Negative test case (fever present, should NOT trigger pneumothorax suspect)
data_fever = [
    ("2025-09-19 09:00:00", 87, 105, 24, 38.5, 110),  # low spo2 but fever
    ("2025-09-19 09:10:00", 86, 108, 23, 38.4, 112),
    ("2025-09-19 09:20:00", 85, 110, 25, 38.6, 115),
    ("2025-09-19 09:30:00", 87, 106, 22, 38.7, 113),
    ("2025-09-19 09:40:00", 86, 109, 24, 38.8, 111),
    ("2025-09-19 09:50:00", 85, 111, 23, 38.9, 110),
]

df_fever = pd.DataFrame(
    data_fever,
    columns=["timestamp", "spo2", "hr", "rr", "temperature", "sbp"]
)
df_fever["timestamp"] = pd.to_datetime(df_fever["timestamp"])

print(detect_pneumothorax_suspect(df_fever))


{'pneumothorax_suspect': False, 'status': 'No evidence for pneumothorax'}
