# Model Performance per Sub-Group - Drug Relapse Dataset

In [1]:
import pandas as pd
import numpy as np
import yaml
import os
from importlib import resources as impresources
from recurrent_health_events_prediction import configs
import plotly.express as px
from recurrent_health_events_prediction.model_analysis.utils import subset_metric_report, add_pred_cols, bin_numeric_column, stratified_bootstrap_delta
from sklearn.metrics import classification_report, roc_auc_score

In [2]:
with open((impresources.files(configs) / 'model_config.yaml')) as f:
    model_config = yaml.safe_load(f)
with open((impresources.files(configs) / 'data_config.yaml')) as f:
    data_config = yaml.safe_load(f)

In [3]:
DATASET = "relapse"
BASELINE_RESULTS_DIR = f"/workspaces/master-thesis-recurrent-health-events-prediction/_models/drug_relapse/classifiers_baselines"
event_id_col = "COLLECTION_ID"

## Import Model and Predicted Probs

In [4]:
model_name = "hmm_log_normal_relapse_time"
base_model_dir = model_config[DATASET]["hidden_markov"]["save_model_path"]
model_dir = os.path.join(base_model_dir, model_name)
config_path = os.path.join(model_dir, f"{model_name}_config.yaml")
with open(config_path, "r") as f:
    hmm_config = yaml.safe_load(f)

In [5]:
probs_pred_hmm_feat_df = pd.read_csv(os.path.join(model_dir, "prob_predictions.csv"))
probs_pred_baseline_df = pd.read_csv(os.path.join(BASELINE_RESULTS_DIR, "prob_predictions.csv"))
full_training_df = pd.read_csv(os.path.join(model_dir, "last_events_with_hidden_states.csv")).drop(columns=["index"])

pred_cols = [col for col in probs_pred_hmm_feat_df.columns if col.startswith("y_pred_")]
pred_cols_hmm_renamed = ['hmm_' + col for col in pred_cols]
pred_cols_baseline_renamed = ['baseline_' + col for col in pred_cols]

probs_pred_hmm_feat_df.rename(columns=dict(zip(pred_cols, pred_cols_hmm_renamed)), inplace=True)
probs_pred_baseline_df.rename(columns=dict(zip(pred_cols, pred_cols_baseline_renamed)), inplace=True)

In [6]:
print("Number of rows in full training data:", len(full_training_df))
print("Number of rows in HMM-based features predictions set:", len(probs_pred_hmm_feat_df))
print("Number of rows in baseline predictions set:", len(probs_pred_baseline_df))

Number of rows in full training data: 12289
Number of rows in HMM-based features predictions set: 2458
Number of rows in baseline predictions set: 2458


In [7]:
full_training_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12289 entries, 0 to 12288
Data columns (total 63 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   DONOR_ID                           12289 non-null  int64  
 1   COLLECTION_ID                      12289 non-null  int64  
 2   RELAPSE_START                      12289 non-null  object 
 3   RELAPSE_END                        12289 non-null  object 
 4   EVENT_DURATION                     12289 non-null  float64
 5   RELAPSE_EVENT                      12289 non-null  int64  
 6   NUM_TESTS_PERIOD                   12289 non-null  int64  
 7   NUM_PREV_RELAPSES                  12289 non-null  int64  
 8   RELAPSE_DURATION_CATEGORY          12289 non-null  object 
 9   RELAPSE_DURATION_CATEGORY_ENCODED  12289 non-null  int64  
 10  RELAPSE_30_DAYS                    12289 non-null  int64  
 11  PREV_POSITIVE_DRUGS                12289 non-null  obj

In [8]:
probs_pred_hmm_feat_df.head()

Unnamed: 0,sample_id,y_true,hmm_y_pred_proba_logreg,hmm_y_pred_proba_rf,hmm_y_pred_proba_lgbm
0,2546456,1,0.663733,0.729807,0.75734
1,1799879,0,0.404081,0.315983,0.400145
2,3193619,0,0.416355,0.424778,0.379926
3,2359036,0,0.40789,0.405304,0.420496
4,1989401,0,0.655784,0.50361,0.545966


In [9]:
probs_pred_baseline_df.head()

Unnamed: 0,sample_id,y_true,baseline_y_pred_proba_logreg,baseline_y_pred_proba_rf,baseline_y_pred_proba_lgbm
0,2546456,1,0.616783,0.668494,0.75101
1,1799879,0,0.348644,0.326531,0.373224
2,3193619,0,0.356821,0.391781,0.392262
3,2359036,0,0.426967,0.481232,0.444973
4,1989401,0,0.634051,0.605522,0.544905


## Comparison Dataframe and Sub-Groups to Analyze

In [10]:
subgroup_cols = [
    "PROGRAM_TYPE",
    "DRUG_POSITIVE_PAST_MEAN",
    "NUM_PREV_RELAPSES",
    "LOG_TIME_RELAPSE_PAST_MEDIAN",
    "RELAPSE_30_DAYS_PAST_SUM"
]

comparison_df = pd.merge(
    probs_pred_hmm_feat_df,
    full_training_df[[event_id_col] + subgroup_cols],
    left_on="sample_id",
    right_on=event_id_col,
    how="left"
).drop(columns=[event_id_col])

comparison_df = pd.merge(
    comparison_df,
    probs_pred_baseline_df.drop(columns=["y_true"]),
    on="sample_id",
    how="inner"
)

In [11]:
comparison_df.columns

Index(['sample_id', 'y_true', 'hmm_y_pred_proba_logreg', 'hmm_y_pred_proba_rf',
       'hmm_y_pred_proba_lgbm', 'PROGRAM_TYPE', 'DRUG_POSITIVE_PAST_MEAN',
       'NUM_PREV_RELAPSES', 'LOG_TIME_RELAPSE_PAST_MEDIAN',
       'RELAPSE_30_DAYS_PAST_SUM', 'baseline_y_pred_proba_logreg',
       'baseline_y_pred_proba_rf', 'baseline_y_pred_proba_lgbm'],
      dtype='object')

In [12]:
comparison_df = add_pred_cols(comparison_df, 0.5)
comparison_df.head()

Unnamed: 0,sample_id,y_true,hmm_y_pred_proba_logreg,hmm_y_pred_proba_rf,hmm_y_pred_proba_lgbm,PROGRAM_TYPE,DRUG_POSITIVE_PAST_MEAN,NUM_PREV_RELAPSES,LOG_TIME_RELAPSE_PAST_MEDIAN,RELAPSE_30_DAYS_PAST_SUM,baseline_y_pred_proba_logreg,baseline_y_pred_proba_rf,baseline_y_pred_proba_lgbm,hmm_y_pred_logreg,hmm_y_pred_rf,hmm_y_pred_lgbm,baseline_y_pred_logreg,baseline_y_pred_rf,baseline_y_pred_lgbm
0,2546456,1,0.663733,0.729807,0.75734,DWI Court,0.363636,11,2.890372,8.0,0.616783,0.668494,0.75101,1,1,1,1,1,1
1,1799879,0,0.404081,0.315983,0.400145,Probation,0.266667,3,4.158883,0.0,0.348644,0.326531,0.373224,0,0,0,0,0,0
2,3193619,0,0.416355,0.424778,0.379926,Drug Court,0.084112,8,3.935846,2.0,0.356821,0.391781,0.392262,0,0,0,0,0,0
3,2359036,0,0.40789,0.405304,0.420496,Drug Court,0.12,8,3.044522,5.0,0.426967,0.481232,0.444973,0,0,0,0,0,0
4,1989401,0,0.655784,0.50361,0.545966,Drug Court,0.157895,2,2.850222,2.0,0.634051,0.605522,0.544905,1,1,1,1,1,1


In [13]:
from pandas.api.types import is_numeric_dtype

for col in subgroup_cols:
    if is_numeric_dtype(comparison_df[col]):
        comparison_df = bin_numeric_column(comparison_df, col, num_bins=3, strategy="quantile")

In [14]:
subgroup_cols = [col for col in comparison_df.columns if col.endswith("_SUBGROUP")]
if DATASET == "relapse":
    subgroup_cols += ["PROGRAM_TYPE"]
subgroup_cols

['DRUG_POSITIVE_PAST_MEAN_SUBGROUP',
 'NUM_PREV_RELAPSES_SUBGROUP',
 'LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP',
 'RELAPSE_30_DAYS_PAST_SUM_SUBGROUP',
 'PROGRAM_TYPE']

In [15]:
comparison_df

Unnamed: 0,sample_id,y_true,hmm_y_pred_proba_logreg,hmm_y_pred_proba_rf,hmm_y_pred_proba_lgbm,PROGRAM_TYPE,DRUG_POSITIVE_PAST_MEAN,NUM_PREV_RELAPSES,LOG_TIME_RELAPSE_PAST_MEDIAN,RELAPSE_30_DAYS_PAST_SUM,...,hmm_y_pred_logreg,hmm_y_pred_rf,hmm_y_pred_lgbm,baseline_y_pred_logreg,baseline_y_pred_rf,baseline_y_pred_lgbm,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,NUM_PREV_RELAPSES_SUBGROUP,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,RELAPSE_30_DAYS_PAST_SUM_SUBGROUP
0,2546456,1,0.663733,0.729807,0.757340,DWI Court,0.363636,11,2.890372,8.0,...,1,1,1,1,1,1,"(0.308, 0.625]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]"
1,1799879,0,0.404081,0.315983,0.400145,Probation,0.266667,3,4.158883,0.0,...,0,0,0,0,0,0,"(0.00809, 0.308]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]"
2,3193619,0,0.416355,0.424778,0.379926,Drug Court,0.084112,8,3.935846,2.0,...,0,0,0,0,0,0,"(0.00809, 0.308]","(3.0, 56.0]","(3.584, 6.849]","(1.0, 2.0]"
3,2359036,0,0.407890,0.405304,0.420496,Drug Court,0.120000,8,3.044522,5.0,...,0,0,0,0,0,0,"(0.00809, 0.308]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]"
4,1989401,0,0.655784,0.503610,0.545966,Drug Court,0.157895,2,2.850222,2.0,...,1,1,1,1,1,1,"(0.00809, 0.308]","(0.999, 3.0]","(2.565, 3.584]","(1.0, 2.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2453,24994493,1,0.680570,0.559393,0.643981,General,0.928571,1,2.302585,1.0,...,1,1,1,1,1,1,"(0.625, 0.992]","(0.999, 3.0]","(0.692, 2.565]","(-0.001, 1.0]"
2454,4399294,0,0.323356,0.322929,0.345690,Probation,0.135135,3,3.526361,1.0,...,0,0,0,0,0,0,"(0.00809, 0.308]","(0.999, 3.0]","(2.565, 3.584]","(-0.001, 1.0]"
2455,22956842,1,0.805446,0.883838,0.957767,Family Court,0.547826,15,2.397895,12.0,...,1,1,1,1,1,1,"(0.308, 0.625]","(3.0, 56.0]","(0.692, 2.565]","(2.0, 55.0]"
2456,20034105,1,0.258762,0.257042,0.263550,General,0.454545,2,4.080259,0.0,...,0,0,0,0,0,0,"(0.308, 0.625]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]"


In [16]:
model_to_analyze = "rf" # Change this to the model you want to analyze 'logreg', 'lgbm', 'rf'

cols = ['sample_id', 'y_true'] +  subgroup_cols + [col for col in comparison_df.columns if model_to_analyze in col]
comparison_model_df = comparison_df[cols]
comparison_model_df.head()

Unnamed: 0,sample_id,y_true,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,NUM_PREV_RELAPSES_SUBGROUP,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,RELAPSE_30_DAYS_PAST_SUM_SUBGROUP,PROGRAM_TYPE,hmm_y_pred_proba_rf,baseline_y_pred_proba_rf,hmm_y_pred_rf,baseline_y_pred_rf
0,2546456,1,"(0.308, 0.625]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",DWI Court,0.729807,0.668494,1,1
1,1799879,0,"(0.00809, 0.308]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Probation,0.315983,0.326531,0,0
2,3193619,0,"(0.00809, 0.308]","(3.0, 56.0]","(3.584, 6.849]","(1.0, 2.0]",Drug Court,0.424778,0.391781,0,0
3,2359036,0,"(0.00809, 0.308]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",Drug Court,0.405304,0.481232,0,0
4,1989401,0,"(0.00809, 0.308]","(0.999, 3.0]","(2.565, 3.584]","(1.0, 2.0]",Drug Court,0.50361,0.605522,1,1


In [17]:
comparison_model_df.columns

Index(['sample_id', 'y_true', 'DRUG_POSITIVE_PAST_MEAN_SUBGROUP',
       'NUM_PREV_RELAPSES_SUBGROUP', 'LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP',
       'RELAPSE_30_DAYS_PAST_SUM_SUBGROUP', 'PROGRAM_TYPE',
       'hmm_y_pred_proba_rf', 'baseline_y_pred_proba_rf', 'hmm_y_pred_rf',
       'baseline_y_pred_rf'],
      dtype='object')

In [18]:
hmm_pred_proba_col = "hmm_y_pred_proba_" + model_to_analyze
baseline_pred_proba_col = "baseline_y_pred_proba_" + model_to_analyze
hmm_pred_col = "hmm_y_pred_" + model_to_analyze
baseline_pred_col = "baseline_y_pred_" + model_to_analyze

## Results

In [19]:
print("AUC HMM-based features:", roc_auc_score(comparison_model_df['y_true'], comparison_model_df[hmm_pred_proba_col]))
print("HMM-based features classification report:")
print(classification_report(comparison_model_df['y_true'], comparison_model_df[hmm_pred_col], zero_division=0))

AUC HMM-based features: 0.7222897226111074
HMM-based features classification report:
              precision    recall  f1-score   support

           0       0.62      0.66      0.64      1142
           1       0.69      0.65      0.67      1316

    accuracy                           0.66      2458
   macro avg       0.65      0.66      0.65      2458
weighted avg       0.66      0.66      0.66      2458



In [20]:
print("AUC Baseline model:", roc_auc_score(comparison_model_df['y_true'], comparison_model_df[baseline_pred_proba_col]))
print("Baseline classification report:")
print(classification_report(comparison_model_df['y_true'], comparison_model_df[baseline_pred_col], zero_division=0))

AUC Baseline model: 0.7302012413565493
Baseline classification report:
              precision    recall  f1-score   support

           0       0.63      0.65      0.64      1142
           1       0.69      0.67      0.68      1316

    accuracy                           0.66      2458
   macro avg       0.66      0.66      0.66      2458
weighted avg       0.66      0.66      0.66      2458



In [23]:
stratified_bootstrap_delta(
    df=comparison_model_df,
    y_col="y_true",
    base_col=baseline_pred_proba_col,
    hmm_col=hmm_pred_proba_col,
    metric="auc",
    n_boot=5000,
    threshold=0.5,
)

100%|██████████| 5000/5000 [00:16<00:00, 310.35it/s]


{'metric': 'auc',
 'obs_delta': -0.007911518745441937,
 'delta_mean': -0.007920946427906033,
 'ci_low': -0.01814930013999864,
 'ci_high': 0.0020102344045269008,
 'p_value': 0.1128,
 'n_boot': 5000}

### LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP

In [24]:
group_col = "LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP"
subset_metric_report(comparison_model_df, group_by=group_col, metric="f1",
                     y_col="y_true",
                     base_col=baseline_pred_col, hmm_col=hmm_pred_col)

  df.groupby(group_by, dropna=False, group_keys=False)


Unnamed: 0,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,n,f1_baseline,f1_hmm,delta
0,"(0.692, 2.565]",843.0,0.683218,0.698729,0.015511
1,"(2.565, 3.584]",800.0,0.572018,0.562895,-0.009123
2,"(3.584, 6.849]",815.0,0.580914,0.626102,0.045188


In [25]:
subset_metric_report(comparison_model_df, group_by=group_col, metric="auc",
                     y_col="y_true",
                     base_col=baseline_pred_proba_col, hmm_col=hmm_pred_proba_col)

  df.groupby(group_by, dropna=False, group_keys=False)


Unnamed: 0,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,n,auc_baseline,auc_hmm,delta
0,"(0.692, 2.565]",843.0,0.714648,0.725979,0.011331
1,"(2.565, 3.584]",800.0,0.619819,0.607128,-0.012691
2,"(3.584, 6.849]",815.0,0.617676,0.631382,0.013706


In [26]:
group_categories = comparison_model_df[group_col].cat.categories
print("Group categories:", group_categories)

Group categories: IntervalIndex([(0.692, 2.565], (2.565, 3.584], (3.584, 6.849]], dtype='interval[float64, right]')


In [27]:
group_selected = group_categories[-1]  # Change this to the group you want to analyze
print("Selected group for analysis:", group_selected)

Selected group for analysis: (3.584, 6.849]


In [28]:
subset_group_df = comparison_model_df[comparison_model_df[group_col] == group_selected]
subset_group_df.head()

Unnamed: 0,sample_id,y_true,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,NUM_PREV_RELAPSES_SUBGROUP,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,RELAPSE_30_DAYS_PAST_SUM_SUBGROUP,PROGRAM_TYPE,hmm_y_pred_proba_rf,baseline_y_pred_proba_rf,hmm_y_pred_rf,baseline_y_pred_rf
1,1799879,0,"(0.00809, 0.308]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Probation,0.315983,0.326531,0,0
2,3193619,0,"(0.00809, 0.308]","(3.0, 56.0]","(3.584, 6.849]","(1.0, 2.0]",Drug Court,0.424778,0.391781,0,0
10,1843635,1,"(0.00809, 0.308]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Probation,0.261578,0.266517,0,0
18,2254510,1,"(0.308, 0.625]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Probation,0.46267,0.476663,0,0
23,2969873,0,"(0.308, 0.625]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Juvenile Court,0.346923,0.346653,0,0


In [29]:
stratified_bootstrap_delta(
    df=subset_group_df,
    y_col="y_true",
    base_col=baseline_pred_proba_col,
    hmm_col=hmm_pred_proba_col,
    metric="f1",
    n_boot=5000,
    threshold=0.5,
)

100%|██████████| 5000/5000 [00:13<00:00, 366.97it/s]


{'metric': 'f1',
 'obs_delta': 0.045187748670229455,
 'delta_mean': 0.04539471992462321,
 'ci_low': 0.02051366336939304,
 'ci_high': 0.07076222611255377,
 'p_value': 0.0004,
 'n_boot': 5000}

### DRUG_POSITIVE_PAST_MEAN_SUBGROUP

In [30]:
group_col = "DRUG_POSITIVE_PAST_MEAN_SUBGROUP"
subset_metric_report(comparison_model_df, group_by=group_col, metric="f1",
                     y_col="y_true",
                     base_col=baseline_pred_col, hmm_col=hmm_pred_col)

  df.groupby(group_by, dropna=False, group_keys=False)


Unnamed: 0,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,n,f1_baseline,f1_hmm,delta
0,"(0.00809, 0.308]",821.0,0.590334,0.608464,0.01813
1,"(0.308, 0.625]",831.0,0.65738,0.648787,-0.008593
2,"(0.625, 0.992]",806.0,0.699077,0.692895,-0.006182


### PROGRAM_TYPE

In [31]:
group_col = "PROGRAM_TYPE"
subset_metric_report(comparison_model_df, group_by=group_col, metric="f1",
                    y_col="y_true", base_col=baseline_pred_col, hmm_col=hmm_pred_col)

Unnamed: 0,PROGRAM_TYPE,n,f1_baseline,f1_hmm,delta
0,Co-Occuring Court,1.0,1.0,1.0,0.0
1,DWI Court,92.0,0.5443,0.537628,-0.006672
2,Demo,1.0,0.0,0.0,0.0
3,Drug Court,363.0,0.662565,0.635921,-0.026645
4,Family Court,281.0,0.680718,0.675479,-0.005239
5,General,252.0,0.718374,0.694497,-0.023876
6,Juvenile Court,78.0,0.732259,0.630966,-0.101293
7,Mental Health,37.0,0.609582,0.657546,0.047963
8,Parole,3.0,0.666667,0.166667,-0.5
9,Pretrial,133.0,0.651675,0.694976,0.043301


In [32]:
group_selected = "Pretrial"
subset_group_df = comparison_model_df[comparison_model_df[group_col] == group_selected]
subset_group_df.head()

Unnamed: 0,sample_id,y_true,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,NUM_PREV_RELAPSES_SUBGROUP,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,RELAPSE_30_DAYS_PAST_SUM_SUBGROUP,PROGRAM_TYPE,hmm_y_pred_proba_rf,baseline_y_pred_proba_rf,hmm_y_pred_rf,baseline_y_pred_rf
22,2183838,0,"(0.625, 0.992]","(0.999, 3.0]","(0.692, 2.565]","(1.0, 2.0]",Pretrial,0.453832,0.567993,0,1
55,3154618,1,"(0.308, 0.625]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",Pretrial,0.516289,0.668274,1,1
65,1975895,1,"(0.308, 0.625]","(0.999, 3.0]","(3.584, 6.849]","(-0.001, 1.0]",Pretrial,0.567347,0.48284,1,0
82,2162697,1,"(0.625, 0.992]","(0.999, 3.0]","(0.692, 2.565]","(-0.001, 1.0]",Pretrial,0.551241,0.684309,1,1
102,2234132,1,"(0.308, 0.625]","(3.0, 56.0]","(0.692, 2.565]","(2.0, 55.0]",Pretrial,0.674835,0.673228,1,1


In [33]:
stratified_bootstrap_delta(
    df=subset_group_df,
    y_col="y_true",
    base_col=baseline_pred_proba_col,
    hmm_col=hmm_pred_proba_col,
    metric="f1",
    n_boot=5000,
    threshold=0.5,
)

100%|██████████| 5000/5000 [00:12<00:00, 394.42it/s]


{'metric': 'f1',
 'obs_delta': 0.04330143540669851,
 'delta_mean': 0.04308196872519677,
 'ci_low': -0.013509216324960842,
 'ci_high': 0.10156228525846502,
 'p_value': 0.144,
 'n_boot': 5000}

### NUM_PREV_RELAPSES_SUBGROUP

In [34]:
group_col = "NUM_PREV_RELAPSES_SUBGROUP"
subset_metric_report(comparison_model_df, group_by=group_col, metric="f1",
                    y_col="y_true", base_col=baseline_pred_col, hmm_col=hmm_pred_col)

  df.groupby(group_by, dropna=False, group_keys=False)


Unnamed: 0,NUM_PREV_RELAPSES_SUBGROUP,n,f1_baseline,f1_hmm,delta
0,"(0.999, 3.0]",1826.0,0.649321,0.64142,-0.007901
1,"(3.0, 56.0]",632.0,0.675343,0.680505,0.005161


In [35]:
group_categories = comparison_model_df[group_col].cat.categories
print("Group categories:", group_categories)

Group categories: IntervalIndex([(0.999, 3.0], (3.0, 56.0]], dtype='interval[float64, right]')


In [36]:
group_selected = group_categories[-1]  # Change this to the group you want to analyze
print("Selected group for analysis:", group_selected)

Selected group for analysis: (3.0, 56.0]


In [37]:
subset_group_df = comparison_model_df[comparison_model_df[group_col] == group_selected]
subset_group_df.head()

Unnamed: 0,sample_id,y_true,DRUG_POSITIVE_PAST_MEAN_SUBGROUP,NUM_PREV_RELAPSES_SUBGROUP,LOG_TIME_RELAPSE_PAST_MEDIAN_SUBGROUP,RELAPSE_30_DAYS_PAST_SUM_SUBGROUP,PROGRAM_TYPE,hmm_y_pred_proba_rf,baseline_y_pred_proba_rf,hmm_y_pred_rf,baseline_y_pred_rf
0,2546456,1,"(0.308, 0.625]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",DWI Court,0.729807,0.668494,1,1
2,3193619,0,"(0.00809, 0.308]","(3.0, 56.0]","(3.584, 6.849]","(1.0, 2.0]",Drug Court,0.424778,0.391781,0,0
3,2359036,0,"(0.00809, 0.308]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",Drug Court,0.405304,0.481232,0,0
5,1979846,1,"(0.308, 0.625]","(3.0, 56.0]","(2.565, 3.584]","(2.0, 55.0]",Drug Court,0.691829,0.709135,1,1
8,1527434,1,"(0.625, 0.992]","(3.0, 56.0]","(0.692, 2.565]","(2.0, 55.0]",Drug Court,0.907041,0.906481,1,1


In [38]:
stratified_bootstrap_delta(
    df=subset_group_df,
    y_col="y_true",
    base_col=baseline_pred_proba_col,
    hmm_col=hmm_pred_proba_col,
    metric="f1",
    n_boot=5000,
    threshold=0.5,
)

  0%|          | 0/5000 [00:00<?, ?it/s]

100%|██████████| 5000/5000 [00:14<00:00, 355.08it/s]


{'metric': 'f1',
 'obs_delta': 0.005161110722150863,
 'delta_mean': 0.005227898892727932,
 'ci_low': -0.023540836941293913,
 'ci_high': 0.034266979540137084,
 'p_value': 0.734,
 'n_boot': 5000}