# Experiments with API

In [1]:
import pandas as pd

## Helpers

In [2]:
def filter_single_patient(admissions_df, diagnoses_df, icu_stays_df, procedures_df, prescriptions_df, patients_metadata_df, targets_df, subject_id):
    admissions_filtered = admissions_df[admissions_df['SUBJECT_ID'] == subject_id]
    diagnoses_filtered = diagnoses_df[diagnoses_df['SUBJECT_ID'] == subject_id]
    icu_stays_filtered = icu_stays_df[icu_stays_df['SUBJECT_ID'] == subject_id]
    procedures_filtered = procedures_df[procedures_df['SUBJECT_ID'] == subject_id]
    prescriptions_filtered = prescriptions_df[prescriptions_df['SUBJECT_ID'] == subject_id]
    patients_metadata_filtered = patients_metadata_df[patients_metadata_df['SUBJECT_ID'] == subject_id]
    targets_filtered = targets_df[targets_df['SUBJECT_ID'] == subject_id]

    return (admissions_filtered, diagnoses_filtered, icu_stays_filtered, procedures_filtered, prescriptions_filtered, patients_metadata_filtered, targets_filtered)

## Import Data

In [3]:
base_dir = '/workspaces/msc-thesis-recurrent-health-modeling/data/mimic-api-demo/'

admissions_df = pd.read_csv(base_dir + 'admissions.csv')
diagnoses_df = pd.read_csv(base_dir + 'diagnoses.csv')
icu_stays_df = pd.read_csv(base_dir + 'icu_stays.csv')
procedures_df = pd.read_csv(base_dir + 'procedures.csv')
prescriptions_df = pd.read_csv(base_dir + 'prescriptions.csv')
patients_metadata_df = pd.read_csv(base_dir + 'patients.csv')
targets_df = pd.read_csv(base_dir + 'targets.csv')

In [4]:
procedures_df["ICD9_CODE"] = procedures_df["ICD9_CODE"].astype(str)

In [5]:
mask = (targets_df['READMISSION_30_DAYS'] == 1)
targets_df.loc[mask].head()

Unnamed: 0,SUBJECT_ID,HADM_ID,READMISSION_30_DAYS,DAYS_UNTIL_NEXT_HOSPITALIZATION,READMISSION_TIME_CAT
11,773,160425,1,20.544444,0-30
14,878,102365,1,24.338889,0-30
18,998,171544,1,18.935417,0-30
19,1006,199286,1,12.261111,0-30
20,1050,182917,1,3.957639,0-30


## Select single patient data

In [6]:
subject_id_example = 222
filtered_data = filter_single_patient(
    admissions_df,
    diagnoses_df,
    icu_stays_df,
    procedures_df,
    prescriptions_df,
    patients_metadata_df,
    targets_df,
    subject_id_example
)

In [7]:
admissions_single_patient_df = filtered_data[0]
diagnoses_single_patient_df = filtered_data[1]
icu_stays_single_patient_df = filtered_data[2]
procedures_single_patient_df = filtered_data[3]
prescriptions_single_patient_df = filtered_data[4]
patients_metadata_single_patient_df = filtered_data[5]
targets_df_single_patient_df = filtered_data[6]

In [8]:
print("Number of admissions for patient {}: {}".format(subject_id_example, len(admissions_single_patient_df)))

Number of admissions for patient 222: 4


In [9]:
admissions_single_patient_df.head()

Unnamed: 0,HADM_ID,SUBJECT_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,INSURANCE,ETHNICITY,DISCHARGE_LOCATION
4,145243,222,2137-07-15 15:31:00,2137-07-17 12:00:00,EMERGENCY,Medicare,WHITE,HOME
5,105083,222,2141-02-18 23:10:00,2141-02-21 15:50:00,EMERGENCY,Medicare,WHITE,HOME HEALTH CARE
6,137006,222,2142-06-11 07:15:00,2142-06-19 12:30:00,ELECTIVE,Medicare,WHITE,HOME HEALTH CARE
7,188038,222,2142-06-23 05:02:00,2142-07-01 17:30:00,EMERGENCY,Medicare,WHITE,HOME


In [10]:
request_payload_single_patient = {
    "admissions": admissions_single_patient_df.to_dict(orient='records'),
    "diagnoses": diagnoses_single_patient_df.to_dict(orient='records'),
    "icu_stays": icu_stays_single_patient_df.to_dict(orient='records'),
    "procedures": procedures_single_patient_df.to_dict(orient='records'),
    "prescriptions": prescriptions_single_patient_df.to_dict(orient='records'),
    "patients": patients_metadata_single_patient_df.to_dict(orient='records'),
    "targets": targets_df_single_patient_df.to_dict(orient='records')
}

## Multiple Patients Data

In [10]:
request_payload_mult_patients = {
    "admissions": admissions_df.to_dict(orient="records"),
    "diagnoses": diagnoses_df.to_dict(orient="records"),
    "icu_stays": icu_stays_df.to_dict(orient="records"),
    "procedures": procedures_df.to_dict(orient="records"),
    "prescriptions": prescriptions_df.to_dict(orient="records"),
    "patients": patients_metadata_df.to_dict(orient="records"),
    "targets": targets_df.to_dict(orient="records")
}

## Test Service

In [11]:
from api.services.prediction import ModelPrediction

model_prediction_service = ModelPrediction()

### Predict Multiple Patients

In [12]:
reponse_mult_patients = model_prediction_service.predict(request_payload_mult_patients)

In [13]:
subject_ids = reponse_mult_patients['prediction']['subject_ids']
hadm_ids = reponse_mult_patients['prediction']['hadm_ids']
pred_probs = reponse_mult_patients['prediction']['pred_probs']
pred_labels = reponse_mult_patients['prediction']['pred_labels']
true_labels = reponse_mult_patients['prediction']['true_labels']
results_df = pd.DataFrame({
    'SUBJECT_ID': subject_ids,
    'HADM_ID': hadm_ids,
    'PRED_PROB': pred_probs,
    'PRED_LABEL': pred_labels,
    'TRUE_LABEL': true_labels
})
results_df

Unnamed: 0,SUBJECT_ID,HADM_ID,PRED_PROB,PRED_LABEL,TRUE_LABEL
0,36,122659,0.461308,0,0.0
1,107,182383,0.259521,0,0.0
2,222,188038,0.309999,0,0.0
3,236,182562,0.163619,0,0.0
4,291,125726,0.169115,0,0.0
...,...,...,...,...,...
995,99346,177015,0.389370,0,0.0
996,99469,126023,0.242672,0,0.0
997,99503,146847,0.298876,0,0.0
998,99650,158023,0.157831,0,0.0


In [None]:
reponse_mult_patients['metrics']

### Predict Single Patient

In [12]:
model_prediction_service.predict(request_payload_single_patient)

  merged_df['PREV_READMISSION_30_DAYS'] = merged_df['PREV_READMISSION_30_DAYS'].fillna(-1).astype(int)
  merged_df['READMISSION_30_DAYS'] = merged_df['READMISSION_30_DAYS'].fillna(0).astype(int)


{'prediction': {'pred_probs': [0.4012572169303894],
  'pred_labels': [0],
  'true_labels': [0.0],
  'attention_weights': None,
  'hadm_ids': [122659],
  'subject_ids': [36]},
 'metadata': {'model_name': 'Attention Pooling',
  'number_of_predictions': 1,
  'timestamp': '2025-10-31T20:29:19.261823'},
 'metrics': {'auc_roc': None,
  'confusion_matrix': None,
  'recall': None,
  'accuracy': None,
  'precision': None,
  'f1_score': None}}

### Explain Single Patient 

In [13]:
model_prediction_service.explain_single_patient(request_payload_single_patient)

  merged_df['PREV_READMISSION_30_DAYS'] = merged_df['PREV_READMISSION_30_DAYS'].fillna(-1).astype(int)
  merged_df['READMISSION_30_DAYS'] = merged_df['READMISSION_30_DAYS'].fillna(0).astype(int)


{'prediction': {'pred_probs': [0.4012572169303894],
  'pred_labels': [0],
  'true_labels': [0],
  'attention_weights': None,
  'hadm_ids': [122659],
  'subject_ids': [36]},
 'input_features': {'past': [],
  'current': {'SUBJECT_ID': 36,
   'HADM_ID': 122659,
   'ADMITTIME': Timestamp('2131-05-12 19:49:00'),
   'DISCHTIME': Timestamp('2131-05-25 13:30:00'),
   'HOSPITALIZATION_DAYS': 12.736805555555556,
   'DAYS_IN_ICU': 6.859548611111111,
   'CHARLSON_INDEX': 3,
   'NUM_DRUGS': 72,
   'NUM_PROCEDURES': 8,
   'PARTICIPATION_DAYS': 12,
   'HAS_DIABETES': False,
   'HAS_COPD': True,
   'HAS_CONGESTIVE_HF': False,
   'DISCHARGE_LOCATION_POST_ACUTE_CARE': 1.0,
   'DISCHARGE_LOCATION_HOME': 0.0,
   'AGE': 70,
   'GENDER_M': 1.0,
   'ADMISSION_TYPE_ELECTIVE': 0.0,
   'ETHNICITY_WHITE': 1.0,
   'ETHNICITY_BLACK': 0.0,
   'ETHNICITY_HISPANIC': 0.0,
   'INSURANCE_MEDICAID': 0.0,
   'INSURANCE_PRIVATE': 0.0}},
 'metadata': {'model_name': 'Attention Pooling',
  'number_of_predictions': 1,
  'times

## Test API Endpoints

In [11]:
import requests
import json

In [None]:
url = "http://127.0.0.1:8000/predict"
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(request_payload_mult_patients))

print(response.status_code)
print(response.json())

In [42]:
response.json()["metrics"]

{'accuracy': 0.74,
 'precision': 0.4915254237288136,
 'recall': 0.11196911196911197,
 'f1_score': 0.18238993710691823,
 'auc_roc': 0.6606068185015554,
 'confusion_matrix': [[711, 30], [230, 29]]}

In [13]:
url = "http://127.0.0.1:8000/explain_single_patient"
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(request_payload_single_patient))

print("Response status code:", response.status_code)

if response.status_code == 200:
    explanation = response.json()
    prediction = explanation.get("prediction", {})
    print("Prediction Results:")
    for key, value in prediction.items():
        if value:
            print(f"{key}: {value[0]}")
    print("\n\nAttention Weights:")
    attention_weights = prediction.get("attention_weights", [])
    if attention_weights:
        for i, weights in enumerate(attention_weights[0]):
            print(f"Admission {i+1}: {weights}")
    else:
        print("No attention weights available.")
    input_features = explanation.get("input_features", {})

    curr = input_features["current"]
    past_df = pd.DataFrame(input_features["past"])
else:
    print("Error:", response.text)
    curr_df = pd.DataFrame()
    past_df = pd.DataFrame()

Response status code: 200
Prediction Results:
pred_probs: 0.3099987506866455
pred_labels: 0
true_labels: 0
attention_weights: [0.33402198553085327, 0.39568451046943665, 0.27029353380203247, 0.0]
hadm_ids: 188038
subject_ids: 222


Attention Weights:
Admission 1: 0.33402198553085327
Admission 2: 0.39568451046943665
Admission 3: 0.27029353380203247
Admission 4: 0.0


In [15]:
curr

{'SUBJECT_ID': 222,
 'HADM_ID': 188038,
 'ADMITTIME': '2142-06-23T05:02:00',
 'DISCHTIME': '2142-07-01T17:30:00',
 'HOSPITALIZATION_DAYS': 8.519444444444444,
 'DAYS_IN_ICU': 3.944039351851852,
 'CHARLSON_INDEX': 8,
 'NUM_DRUGS': 22,
 'NUM_PROCEDURES': 2,
 'PARTICIPATION_DAYS': 1812,
 'HAS_DIABETES': False,
 'HAS_COPD': False,
 'HAS_CONGESTIVE_HF': False,
 'DISCHARGE_LOCATION_POST_ACUTE_CARE': 0.0,
 'DISCHARGE_LOCATION_HOME': 1.0,
 'AGE': 69,
 'GENDER_M': 0.0,
 'ADMISSION_TYPE_ELECTIVE': 0.0,
 'ETHNICITY_WHITE': 1.0,
 'ETHNICITY_BLACK': 0.0,
 'ETHNICITY_HISPANIC': 0.0,
 'INSURANCE_MEDICAID': 0.0,
 'INSURANCE_PRIVATE': 0.0}

In [16]:
past_df

Unnamed: 0,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,HOSPITALIZATION_DAYS,DAYS_IN_ICU,CHARLSON_INDEX,NUM_DRUGS,NUM_PROCEDURES,DISCHARGE_LOCATION_POST_ACUTE_CARE,ADMISSION_TYPE_ELECTIVE,DAYS_UNTIL_NEXT_HOSPITALIZATION
0,222,145243,2137-07-15T15:31:00,2137-07-17T12:00:00,1.853472,1.899444,4,9,2,0.0,0.0,1312.465278
1,222,105083,2141-02-18T23:10:00,2141-02-21T15:50:00,2.694444,1.545637,5,25,9,0.0,0.0,474.642361
2,222,137006,2142-06-11T07:15:00,2142-06-19T12:30:00,8.21875,3.182025,13,36,2,0.0,1.0,3.688889


In [20]:
from recurrent_health_events_prediction.visualization.utils import plot_subject_evolution

feat_display = past_df.columns.tolist()[4:]
plot_subject_evolution(past_df, 222, features_to_plot=feat_display)

In [18]:
feat_display

['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'HOSPITALIZATION_DAYS']

In [59]:
import numpy as np
import plotly.graph_objects as go

def make_attention_fig(attention_weights, hadm_ids, kind="bar"):
    """
    Plot attention weights across the last n admissions.
    
    Parameters
    ----------
    attention_weights : list[float]
        Attention values (length = n). Index 1 corresponds to the earliest
        admission within the observation window.
    hadm_ids : list[str|int]
        All admission IDs. The last n are used, aligned to attention_weights.
    kind : {"bar","line"}
        Choose a bar chart or a line chart with markers.
    """
    attention_weights = list(filter(lambda w: w > 0, attention_weights))
    n = len(attention_weights)
    if n == 0:
        raise ValueError("attention_weights is empty.")
    if len(hadm_ids) < n:
        raise ValueError("hadm_ids must be at least as long as attention_weights.")
    
    # Take the last n admissions and align with the attention weights
    hadm_subset = hadm_ids[-n:]
    x_idx = list(range(1, n + 1))  # 1-based indexing on the x-axis

    if kind == "line":
        trace = go.Scatter(
            x=x_idx, y=attention_weights, mode="lines+markers",
            customdata=np.array(hadm_subset),
            hovertemplate=(
                "Admission index: %{x}<br>"
                "HADM_ID: %{customdata}<br>"
                "Attention: %{y:.2f}<extra></extra>"
            ),
        )
    else:  # "bar"
        trace = go.Bar(
            x=x_idx, y=attention_weights,
            customdata=np.array(hadm_subset),
            hovertemplate=(
                "Admission index: %{x}<br>"
                "HADM_ID: %{customdata}<br>"
                "Attention: %{y:.2}<extra></extra>"
            ),
        )

    fig = go.Figure(trace)
    fig.update_layout(
        title="Attention over the last admissions",
        xaxis=dict(
            title=f"Admission index within observation window "
                  f"(1 = first of the last {n} admissions)",
            dtick=1,          # show only integer ticks (… 1, 2, 3, …)
            tick0=1,          # start ticks at 1
            range=[0.5, n + 0.5],  # centers bars/points on integer positions
        ),
        yaxis=dict(title="Attention weight"),
        margin=dict(l=60, r=20, t=50, b=70),
    )
    return fig

In [60]:

# Example:
fig = make_attention_fig(attention_weights=[0.12, 0.03, 0.21, 0.64], hadm_ids=[101,102,103,104,105,106], kind="line")
fig.show()
