# Pharmacy Orders Feature Script

## Description
This script extracts features from the Pharmacy system (`pharmacy.csv`), capturing the volume and types of medications processed and dispensed for the patient.

## Clinical Justification for HAPI Research
* **Polypharmacy:** A high `num_pharm_orders` count correlates with adverse drug events and overall physiological instability.
* **Verification:** Pharmacy data serves as a middle-ground verification between the physician's order (Prescriptions) and the nurse's administration (EMAR).
* **Specific Classes:** We track Vasopressors (perfusion risk) and Sedatives/Opioids (immobility risk) to see if these dangerous drugs were actively being dispensed.

## Inputs & Outputs
* **Input:** `pharmacy.csv`
* **Output:** `pharmacy_feat.csv`
* **Key Features:** `has_vasoactive_pharm`, `has_sedation_pharm`, `has_opioid_pharm`

In [1]:
import os
import pandas as pd

In [2]:
# Configuration

BASE_DIR = r"D:\School\5141"

# Define Input/Output Paths
PHARM_PATH      = os.path.join(BASE_DIR, "pharmacy.csv", "pharmacy.csv")
PHARM_FEAT_PATH = os.path.join(BASE_DIR, "pharmacy_feat.csv")

In [3]:
# High-risk medication classes.

# GENERAL HIGH RISK
# Combined list of all drugs known to impact perfusion or mobility.
HIGH_RISK_MED_KEYWORDS = [
    "norepinephrine", "levophed", "epinephrine", "vasopressin",
    "dopamine", "dobutamine", "phenylephrine", "neosynephrine",
    "propofol", "midazolam", "versed", "lorazepam", "ativan",
    "dexmedetomidine", "precedex",
    "fentanyl", "morphine", "hydromorphone", "dilaudid", "oxycodone"
]

# VASOACTIVE AGENTS
# Peripheral Vasoconstriction leads to Ischemia.
VASOACTIVE_KEYWORDS = [
    "norepinephrine", "levophed", "epinephrine", "vasopressin",
    "dopamine", "dobutamine", "phenylephrine", "neosynephrine"
]

# SEDATIVES
# Immobility & Sensory Loss: Patients on these drips cannot move to relieve pressure.
SEDATION_KEYWORDS = [
    "propofol", "midazolam", "versed", "lorazepam", "ativan",
    "dexmedetomidine", "precedex"
]

# OPIOIDS
# CNS Depression: Reduces spontaneous movement during sleep/rest.
OPIOID_KEYWORDS = [
    "fentanyl", "morphine", "hydromorphone", "dilaudid", "oxycodone"
]

In [4]:
def load_pharmacy(path: str):
    """
    Load pharmacy.csv and standardize a unified lowercase text column 'med_text'
    for medication names.
    """
    df = pd.read_csv(path, low_memory=False)

    # Make sure hadm_id is a nullable integer
    if "hadm_id" not in df.columns:
        raise ValueError(f"'hadm_id' not found in pharmacy columns: {df.columns}")

    df["hadm_id"] = df["hadm_id"].astype("Int64")

    # Try to detect the medication name column
    possible_med_cols = ["medication", "drug", "med", "pharmacy_drug_name"]
    med_col = None
    for c in possible_med_cols:
        if c in df.columns:
            med_col = c
            break

    if med_col is None:
        raise ValueError(
            f"No medication name column found. Columns in file: {df.columns}"
        )


    # Keep only what we need
    df = df[["hadm_id", med_col]].copy()

    # Create a clean lowercase text column for keyword matching
    df["med_text"] = df[med_col].astype(str).str.lower()

    return df


def build_pharm_features(df: pd.DataFrame):
    """
    Aggregate pharmacy orders to hadm-level features.

    Returns one row per hadm_id with:
        - num_pharm_orders
        - num_distinct_pharm_meds
        - num_high_risk_pharm_orders
        - has_high_risk_pharm_order
    """
    

    # Total number of pharmacy orders per admission
    num_orders = (
        df.groupby("hadm_id")["med_text"]
        .size()
        .rename("num_pharm_orders")
    )

    # Number of distinct medication names per admission
    num_distinct = (
        df.groupby("hadm_id")["med_text"]
        .nunique()
        .rename("num_distinct_pharm_meds")
    )

    # Flag high-risk medications using simple keyword matching
    pattern = "|".join([rf"\b{k}\b" for k in HIGH_RISK_MED_KEYWORDS])
    df["is_high_risk"] = df["med_text"].str.contains(pattern, na=False, regex=True)

    high_risk_counts = (
        df.groupby("hadm_id")["is_high_risk"]
        .sum()
        .rename("num_high_risk_pharm_orders")
    )

    has_high_risk = (high_risk_counts > 0).astype("int8").rename("has_high_risk_pharm_order")

    # Combine all features into one DataFrame
    feat = (
        pd.concat([num_orders, num_distinct, high_risk_counts, has_high_risk], axis=1)
        .reset_index()
    )

    print(f"Built pharmacy features for {len(feat)} admissions.")
    return feat


In [5]:
# Execute
if __name__ == "__main__":
    pharm = load_pharmacy(PHARM_PATH)
    pharm_feat = build_pharm_features(pharm)
    print(f"Saving to: {PHARM_FEAT_PATH}")
    pharm_feat.to_csv(PHARM_FEAT_PATH, index=False)
    print("Done.")


Built pharmacy features for 463328 admissions.
Saving to: D:\School\5141\pharmacy_feat.csv
Done.
