# Experiments with API

In [1]:
import pandas as pd

## Helpers

In [2]:
def filter_single_patient(admissions_df, diagnoses_df, icu_stays_df, procedures_df, prescriptions_df, patients_metadata_df, targets_df, subject_id):
    admissions_filtered = admissions_df[admissions_df['SUBJECT_ID'] == subject_id]
    diagnoses_filtered = diagnoses_df[diagnoses_df['SUBJECT_ID'] == subject_id]
    icu_stays_filtered = icu_stays_df[icu_stays_df['SUBJECT_ID'] == subject_id]
    procedures_filtered = procedures_df[procedures_df['SUBJECT_ID'] == subject_id]
    prescriptions_filtered = prescriptions_df[prescriptions_df['SUBJECT_ID'] == subject_id]
    patients_metadata_filtered = patients_metadata_df[patients_metadata_df['SUBJECT_ID'] == subject_id]
    targets_filtered = targets_df[targets_df['SUBJECT_ID'] == subject_id]

    return (admissions_filtered, diagnoses_filtered, icu_stays_filtered, procedures_filtered, prescriptions_filtered, patients_metadata_filtered, targets_filtered)

## Import Data

In [3]:
base_dir = '/workspaces/msc-thesis-recurrent-health-modeling/data/mimic-api-demo/'

admissions_df = pd.read_csv(base_dir + 'admissions.csv')
diagnoses_df = pd.read_csv(base_dir + 'diagnoses.csv')
icu_stays_df = pd.read_csv(base_dir + 'icu_stays.csv')
procedures_df = pd.read_csv(base_dir + 'procedures.csv')
prescriptions_df = pd.read_csv(base_dir + 'prescriptions.csv')
patients_metadata_df = pd.read_csv(base_dir + 'patients.csv')
targets_df = pd.read_csv(base_dir + 'targets.csv')

In [4]:
procedures_df["ICD9_CODE"] = procedures_df["ICD9_CODE"].astype(str)

In [5]:
mask = (targets_df['READMISSION_30_DAYS'] == 1)
targets_df.loc[mask].head()

Unnamed: 0,SUBJECT_ID,HADM_ID,READMISSION_30_DAYS,DAYS_UNTIL_NEXT_HOSPITALIZATION,READMISSION_TIME_CAT
11,773,160425,1,20.544444,0-30
14,878,102365,1,24.338889,0-30
18,998,171544,1,18.935417,0-30
19,1006,199286,1,12.261111,0-30
20,1050,182917,1,3.957639,0-30


## Select single patient data

In [6]:
subject_id_example = 222
filtered_data = filter_single_patient(
    admissions_df,
    diagnoses_df,
    icu_stays_df,
    procedures_df,
    prescriptions_df,
    patients_metadata_df,
    targets_df,
    subject_id_example
)

In [7]:
admissions_single_patient_df = filtered_data[0]
diagnoses_single_patient_df = filtered_data[1]
icu_stays_single_patient_df = filtered_data[2]
procedures_single_patient_df = filtered_data[3]
prescriptions_single_patient_df = filtered_data[4]
patients_metadata_single_patient_df = filtered_data[5]
targets_df_single_patient_df = filtered_data[6]

In [8]:
print("Number of admissions for patient {}: {}".format(subject_id_example, len(admissions_single_patient_df)))

Number of admissions for patient 222: 4


In [9]:
admissions_single_patient_df.head()

Unnamed: 0,HADM_ID,SUBJECT_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,INSURANCE,ETHNICITY,DISCHARGE_LOCATION
4,145243,222,2137-07-15 15:31:00,2137-07-17 12:00:00,EMERGENCY,Medicare,WHITE,HOME
5,105083,222,2141-02-18 23:10:00,2141-02-21 15:50:00,EMERGENCY,Medicare,WHITE,HOME HEALTH CARE
6,137006,222,2142-06-11 07:15:00,2142-06-19 12:30:00,ELECTIVE,Medicare,WHITE,HOME HEALTH CARE
7,188038,222,2142-06-23 05:02:00,2142-07-01 17:30:00,EMERGENCY,Medicare,WHITE,HOME


In [10]:
request_payload_single_patient = {
    "admissions": admissions_single_patient_df.to_dict(orient='records'),
    "diagnoses": diagnoses_single_patient_df.to_dict(orient='records'),
    "icu_stays": icu_stays_single_patient_df.to_dict(orient='records'),
    "procedures": procedures_single_patient_df.to_dict(orient='records'),
    "prescriptions": prescriptions_single_patient_df.to_dict(orient='records'),
    "patients": patients_metadata_single_patient_df.to_dict(orient='records'),
    "targets": None
}

## Multiple Patients Data

In [11]:
request_payload_mult_patients = {
    "admissions": admissions_df.to_dict(orient="records"),
    "diagnoses": diagnoses_df.to_dict(orient="records"),
    "icu_stays": icu_stays_df.to_dict(orient="records"),
    "procedures": procedures_df.to_dict(orient="records"),
    "prescriptions": prescriptions_df.to_dict(orient="records"),
    "patients": patients_metadata_df.to_dict(orient="records"),
    "targets": None
}

## Test Service

In [12]:
from api.services.prediction import ModelPrediction

model_prediction_service = ModelPrediction()

### Predict Multiple Patients

In [8]:
reponse_mult_patients = model_prediction_service.predict(request_payload_mult_patients)

In [22]:
reponse_mult_patients["prediction"]["true_labels"]

In [9]:
subject_ids = reponse_mult_patients['prediction']['subject_ids']
hadm_ids = reponse_mult_patients['prediction']['hadm_ids']
pred_probs = reponse_mult_patients['prediction']['pred_probs']
pred_labels = reponse_mult_patients['prediction']['pred_labels']

In [10]:
true_labels = reponse_mult_patients['prediction'].get('true_labels', [None]*len(subject_ids))
results_df = pd.DataFrame({
    'SUBJECT_ID': subject_ids,
    'HADM_ID': hadm_ids,
    'PRED_PROB': pred_probs,
    'PRED_LABEL': pred_labels,
    'TRUE_LABEL': true_labels
})
results_df

Unnamed: 0,SUBJECT_ID,HADM_ID,PRED_PROB,PRED_LABEL,TRUE_LABEL
0,36,122659,0.461308,0,
1,107,182383,0.259521,0,
2,222,188038,0.309999,0,
3,236,182562,0.163619,0,
4,291,125726,0.169115,0,
...,...,...,...,...,...
995,99346,177015,0.389370,0,
996,99469,126023,0.242672,0,
997,99503,146847,0.298876,0,
998,99650,158023,0.157831,0,


In [11]:
reponse_mult_patients['metrics']

{'auc_roc': None,
 'confusion_matrix': None,
 'recall': None,
 'accuracy': None,
 'precision': None,
 'f1_score': None}

### Predict Single Patient

In [19]:
model_prediction_service.predict(request_payload_single_patient)

{'prediction': {'pred_probs': [0.3099987506866455],
  'pred_labels': [0],
  'true_labels': None,
  'attention_weights': [[0.33402198553085327,
    0.39568451046943665,
    0.27029353380203247,
    0.0]],
  'hadm_ids': [188038],
  'subject_ids': [222]},
 'metadata': {'model_name': 'Attention Pooling',
  'number_of_predictions': 1,
  'timestamp': '2025-11-02T18:07:14.838912',
  'prob_threshold': 0.5},
 'metrics': {'auc_roc': None,
  'confusion_matrix': None,
  'recall': None,
  'accuracy': None,
  'precision': None,
  'f1_score': None}}

### Explain Single Patient 

In [13]:
response = model_prediction_service.explain_single_patient(request_payload_single_patient)

In [14]:
from api.schemas import ExplainSinglePatientEnvelope


ExplainSinglePatientEnvelope(**response)

ExplainSinglePatientEnvelope(explanation=ExplanationBody(current_features_attributions=[FeatureAttributionRow(feature='CHARLSON_INDEX', attribution=0.21558998488836617), FeatureAttributionRow(feature='GENDER_M', attribution=0.16840288222174116), FeatureAttributionRow(feature='LOG_PARTICIPATION_DAYS', attribution=0.12606682384137302), FeatureAttributionRow(feature='ETHNICITY_WHITE', attribution=0.10838427238038738), FeatureAttributionRow(feature='DISCHARGE_LOCATION_HOME', attribution=0.09645426818916625), FeatureAttributionRow(feature='LOG_DAYS_IN_ICU', attribution=0.07641784223212314), FeatureAttributionRow(feature='HAS_DIABETES', attribution=0.0669003870947727), FeatureAttributionRow(feature='ETHNICITY_BLACK', attribution=0.043225966114819374), FeatureAttributionRow(feature='HAS_CONGESTIVE_HF', attribution=0.04058688259236384), FeatureAttributionRow(feature='AGE', attribution=0.033391018614052), FeatureAttributionRow(feature='DISCHARGE_LOCATION_POST_ACUTE_CARE', attribution=0.02485472

## Test API Endpoints

In [17]:
import requests
import json

In [18]:
url = "http://127.0.0.1:8000/predict"
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(request_payload_mult_patients))

print(response.status_code)
print(response.json())

200
{'prediction': {'pred_probs': [0.46130767464637756, 0.25952064990997314, 0.3099987506866455, 0.16361898183822632, 0.16911503672599792, 0.2986503839492798, 0.3561899960041046, 0.226303368806839, 0.2525293231010437, 0.112576425075531, 0.3648146688938141, 0.28403374552726746, 0.6198316216468811, 0.14000274240970612, 0.5025544762611389, 0.13156156241893768, 0.34489691257476807, 0.3868878185749054, 0.34894320368766785, 0.4655096232891083, 0.3929470181465149, 0.23685279488563538, 0.3072715997695923, 0.20914772152900696, 0.5231341123580933, 0.18550829589366913, 0.20617277920246124, 0.38574597239494324, 0.24530784785747528, 0.29710203409194946, 0.40354952216148376, 0.26163455843925476, 0.2903454005718231, 0.1300036609172821, 0.168972909450531, 0.2956843078136444, 0.14608140289783478, 0.21702860295772552, 0.590690553188324, 0.27225396037101746, 0.2842119038105011, 0.5380500555038452, 0.44156795740127563, 0.4014063775539398, 0.4180000126361847, 0.30290016531944275, 0.18882957100868225, 0.275

In [19]:
response.json()["metadata"]

{'model_name': 'Attention Pooling',
 'number_of_predictions': 1000,
 'timestamp': '2025-11-02T18:14:09.558068',
 'prob_threshold': 0.5}

In [27]:
url = "http://127.0.0.1:8000/explain_single_patient"
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(request_payload_single_patient))

print("Response status code:", response.status_code)

if response.status_code == 200:
    response_dict = response.json()
    explanation = response_dict.get("explanation", {})
    
    curr_explanation_df = pd.DataFrame(explanation.get("current_features_attributions", []))
    past_explanation_df = pd.DataFrame(explanation.get("past_features_attributions", []))
    split_explanation_dict = explanation.get("feature_attribution_split", {})

    input_features = response_dict.get("input_features", {})

    curr = input_features["current"]
    past_df = pd.DataFrame(input_features["past"])
else:
    print("Error:", response.text)
    curr_df = pd.DataFrame()
    past_df = pd.DataFrame()
    curr_explanation_df = pd.DataFrame()
    past_explanation_df = pd.DataFrame()
    split_explanation_dict = {}

Response status code: 200


In [28]:
curr

{'SUBJECT_ID': 222,
 'HADM_ID': 188038,
 'ADMITTIME': '2142-06-23T05:02:00',
 'DISCHTIME': '2142-07-01T17:30:00',
 'HOSPITALIZATION_DAYS': 8.519444444444444,
 'DAYS_IN_ICU': 3.944039351851852,
 'CHARLSON_INDEX': 8,
 'NUM_DRUGS': 22,
 'NUM_PROCEDURES': 2,
 'PARTICIPATION_DAYS': 1812,
 'HAS_DIABETES': False,
 'HAS_COPD': False,
 'HAS_CONGESTIVE_HF': False,
 'DISCHARGE_LOCATION_POST_ACUTE_CARE': False,
 'DISCHARGE_LOCATION_HOME': True,
 'AGE': 69,
 'GENDER_M': False,
 'ADMISSION_TYPE_ELECTIVE': False,
 'ETHNICITY_WHITE': True,
 'ETHNICITY_BLACK': False,
 'ETHNICITY_HISPANIC': False,
 'INSURANCE_MEDICAID': False,
 'INSURANCE_PRIVATE': False}

In [29]:
past_df

Unnamed: 0,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,HOSPITALIZATION_DAYS,DAYS_IN_ICU,CHARLSON_INDEX,NUM_DRUGS,NUM_PROCEDURES,DISCHARGE_LOCATION_POST_ACUTE_CARE,ADMISSION_TYPE_ELECTIVE,DAYS_UNTIL_NEXT_HOSPITALIZATION
0,222,145243,2137-07-15T15:31:00,2137-07-17T12:00:00,1.853472,1.899444,4,9,2,False,False,1312.465278
1,222,105083,2141-02-18T23:10:00,2141-02-21T15:50:00,2.694444,1.545637,5,25,9,False,False,474.642361
2,222,137006,2142-06-11T07:15:00,2142-06-19T12:30:00,8.21875,3.182025,13,36,2,False,True,3.688889


In [30]:
curr_explanation_df

Unnamed: 0,feature,attribution
0,CHARLSON_INDEX,0.21559
1,GENDER_M,0.168403
2,LOG_PARTICIPATION_DAYS,0.126067
3,ETHNICITY_WHITE,0.108384
4,DISCHARGE_LOCATION_HOME,0.096454
5,LOG_DAYS_IN_ICU,0.076418
6,HAS_DIABETES,0.0669
7,ETHNICITY_BLACK,0.043226
8,HAS_CONGESTIVE_HF,0.040587
9,AGE,0.033391


In [21]:
from recurrent_health_events_prediction.visualization.utils import plot_subject_evolution

feat_display = past_df.columns.tolist()[4:]
plot_subject_evolution(past_df, 222, features_to_plot=feat_display)

In [18]:
feat_display

['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'HOSPITALIZATION_DAYS']

In [59]:
import numpy as np
import plotly.graph_objects as go

def make_attention_fig(attention_weights, hadm_ids, kind="bar"):
    """
    Plot attention weights across the last n admissions.
    
    Parameters
    ----------
    attention_weights : list[float]
        Attention values (length = n). Index 1 corresponds to the earliest
        admission within the observation window.
    hadm_ids : list[str|int]
        All admission IDs. The last n are used, aligned to attention_weights.
    kind : {"bar","line"}
        Choose a bar chart or a line chart with markers.
    """
    attention_weights = list(filter(lambda w: w > 0, attention_weights))
    n = len(attention_weights)
    if n == 0:
        raise ValueError("attention_weights is empty.")
    if len(hadm_ids) < n:
        raise ValueError("hadm_ids must be at least as long as attention_weights.")
    
    # Take the last n admissions and align with the attention weights
    hadm_subset = hadm_ids[-n:]
    x_idx = list(range(1, n + 1))  # 1-based indexing on the x-axis

    if kind == "line":
        trace = go.Scatter(
            x=x_idx, y=attention_weights, mode="lines+markers",
            customdata=np.array(hadm_subset),
            hovertemplate=(
                "Admission index: %{x}<br>"
                "HADM_ID: %{customdata}<br>"
                "Attention: %{y:.2f}<extra></extra>"
            ),
        )
    else:  # "bar"
        trace = go.Bar(
            x=x_idx, y=attention_weights,
            customdata=np.array(hadm_subset),
            hovertemplate=(
                "Admission index: %{x}<br>"
                "HADM_ID: %{customdata}<br>"
                "Attention: %{y:.2}<extra></extra>"
            ),
        )

    fig = go.Figure(trace)
    fig.update_layout(
        title="Attention over the last admissions",
        xaxis=dict(
            title=f"Admission index within observation window "
                  f"(1 = first of the last {n} admissions)",
            dtick=1,          # show only integer ticks (â€¦ 1, 2, 3, â€¦)
            tick0=1,          # start ticks at 1
            range=[0.5, n + 0.5],  # centers bars/points on integer positions
        ),
        yaxis=dict(title="Attention weight"),
        margin=dict(l=60, r=20, t=50, b=70),
    )
    return fig

In [60]:

# Example:
fig = make_attention_fig(attention_weights=[0.12, 0.03, 0.21, 0.64], hadm_ids=[101,102,103,104,105,106], kind="line")
fig.show()
