**1. Dependencies and Setup**

In [1]:
!pip install catboost tslearn

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting tslearn
  Downloading tslearn-0.6.3-py3-none-any.whl.metadata (14 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tslearn-0.6.3-py3-none-any.whl (374 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tslearn, catboost
Successfully installed catboost-1.2.7 tslearn-0.6.3


**2. Importing Libraries**

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from scipy import stats
from sklearn.preprocessing import StandardScaler
import matplotlib.dates as mdates
from tslearn.preprocessing import TimeSeriesScalerMeanVariance

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

**3. Data Loading**

In [3]:
df = pd.read_csv("final_data_clipped.csv")

print(df.head())

  df = pd.read_csv("final_data_clipped.csv")


   Unnamed: 0  patient_id    pr_display spo2_display resp_display  \
0           0        7001  71 beats/min          96%          17%   
1           1        7001  71 beats/min          96%          17%   
2           2        7001  71 beats/min          96%          17%   
3           3        7001  71 beats/min          96%          17%   
4           4        7001  71 beats/min          96%          17%   

   pulse_rate_obscount  pulse_rate_avg  pulse_rate_min  pulse_rate_max  \
0                 10.0           70.01           66.63           71.52   
1                 10.0           70.01           66.63           71.52   
2                 10.0           70.01           66.63           71.52   
3                 10.0           70.01           66.63           71.52   
4                 10.0           70.01           66.63           71.52   

   pulse_rate_iqr  ...  QC Deviation from median.1  \
0            2.88  ...                    0.047985   
1            2.88  ...          

**4. Exploratory Data Analysis and Cleaning**

In this section, relevant features are identified and subsets are created based on the type of agents (JNJ or BMS).

In [4]:
complete_set = ['PT_ID','CRS on date (0 No, 1 Yes)','Agent (JNJ/BMS/Caribou)','datetime', 'spo2_avg', 'pulse_rate_avg','respiratory_rate_avg', 'covered_skin_temperature_avg','covered_axil_temperature_avg','Highest Ferritin',
 'Highest CRP','IL8',
 'TNFRSF9',
 'TIE2',
 'MCP-3',
 'CD40-L',
 'IL-1 alpha',
 'CD244',
 'EGF',
 'ANGPT1',
 'IL7',
 'PGF',
 'IL6',
 'ADGRG1',
 'MCP-1',
 'CRTAM',
 'CXCL11',
 'MCP-4',
 'TRAIL',
 'FGF2',
 'CXCL9',
 'CD8A',
 'CAIX',
 'MUC-16',
 'ADA',
 'CD4',
 'NOS3',
 'IL2',
 'Gal-9',
 'VEGFR-2',
 'CD40',
 'IL18',
 'GZMH',
 'KIR3DL1',
 'LAP TGF-beta-1',
 'CXCL1',
 'TNFSF14',
 'IL33',
 'TWEAK',
 'PDGF subunit B',
 'PDCD1',
 'FASLG',
 'CD28',
 'CCL19',
 'MCP-2',
 'CCL4',
 'IL15',
 'Gal-1',
 'PD-L1',
 'CD27',
 'CXCL5',
 'IL5',
 'HGF',
 'GZMA',
 'HO-1',
 'CX3CL1',
 'CXCL10',
 'CD70',
 'IL10',
 'TNFRSF12A',
 'CCL23',
 'CD5',
 'CCL3',
 'MMP7',
 'ARG1',
 'NCR1',
 'DCN',
 'TNFRSF21',
 'TNFRSF4',
 'MIC-A/B',
 'CCL17',
 'ANGPT2',
 'PTN',
 'CXCL12',
 'IFN-gamma',
 'LAMP3',
 'CASP-8',
 'ICOSLG',
 'MMP12',
 'CXCL13',
 'PD-L2',
 'VEGFA',
 'IL4',
 'LAG3',
 'IL12RB1',
 'IL13',
 'CCL20',
 'TNF',
 'KLRD1',
 'GZMB',
 'CD83',
 'IL12',
 'CSF-1',]

Taking different feature sets for JNJ and BMS

In [5]:
columns_test_JNJ = ['Agent (JNJ/BMS/Caribou)', 'CAIX', 'CASP-8', 'CCL23', 'CD40-L', 'CD70',
'CRS on date (0 No, 1 Yes)', 'CXCL10', 'CXCL11', 'CXCL13', 'FASLG',
'FGF2', 'GZMB', 'GZMH', 'Highest CRP', 'Highest Ferritin', 'IFN-gamma',
'IL10', 'IL13', 'IL15', 'IL6', 'IL8', 'MCP-2', 'MMP12', 'PT_ID',
'TIE2', 'TNFRSF9', 'TNFSF14', 'covered_skin_temperature_avg', 'datetime',
'pulse_rate_avg', 'respiratory_rate_avg', 'spo2_avg']
columns_test_BMS = ['FASLG', 'MCP-1', 'CD8A', 'CD70', 'CCL19', 'Highest CRP', 'KLRD1', 'TNFRSF9', 'CXCL12', 'ADGRG1', 'IL2', 'CXCL11', 'GZMH', 'TRAIL', 'IL5', 'TNFSF14', 'HO-1', 'CXCL1', 'CXCL5', 'CD244',
 'PT_ID', 'CRS on date (0 No, 1 Yes)', 'Agent (JNJ/BMS/Caribou)', 'datetime', 'spo2_avg', 'pulse_rate_avg', 'respiratory_rate_avg', 'covered_skin_temperature_avg', 'IL8', 'IL6', 'CXCL10',
 'IFN-gamma', 'CCL23', 'CASP-8', 'CXCL13']
df_subset_JNJ = df[columns_test_JNJ]
df_subset_BMS = df[columns_test_BMS]
df_subset_JNJ.head(5)
df_subset_BMS.head(5)

Unnamed: 0,FASLG,MCP-1,CD8A,CD70,CCL19,Highest CRP,KLRD1,TNFRSF9,CXCL12,ADGRG1,...,pulse_rate_avg,respiratory_rate_avg,covered_skin_temperature_avg,IL8,IL6,CXCL10,IFN-gamma,CCL23,CASP-8,CXCL13
0,5.80632,11.55053,8.24133,3.73004,10.73964,22.6,4.53595,5.67664,2.73464,2.15725,...,70.01,24.35,27.65,5.57085,4.75454,9.13509,5.93862,10.8933,4.38744,6.72158
1,5.80632,11.55053,8.24133,3.73004,10.73964,22.6,4.53595,5.67664,2.73464,2.15725,...,70.01,24.35,27.65,5.57085,4.75454,9.13509,5.93862,10.8933,4.38744,6.72158
2,5.810654,11.572441,8.239465,3.734411,10.745863,22.574747,4.536448,5.677832,2.735678,2.15513,...,70.01,19.86,27.735,5.575702,4.773422,9.158341,6.003697,10.897528,4.40339,6.724164
3,5.814989,11.594352,8.237599,3.738781,10.752086,22.549495,4.536946,5.679023,2.736717,2.15301,...,70.01,15.37,27.82,5.580553,4.792303,9.181591,6.068774,10.901756,4.41934,6.726747
4,5.819323,11.616262,8.235734,3.743152,10.75831,22.524242,4.537444,5.680215,2.737755,2.15089,...,70.01,11.77,27.78,5.585405,4.811185,9.204842,6.133852,10.905983,4.435289,6.729331


In [6]:
df_JNJ = df_subset_JNJ[(df_subset_JNJ['Agent (JNJ/BMS/Caribou)']=='JNJ') | (df_subset_JNJ['Agent (JNJ/BMS/Caribou)']=='JNJ OOS')]
df_BMS = df_subset_BMS[(df_subset_BMS['Agent (JNJ/BMS/Caribou)']=='BMS')]

**5. Data Individualization and Baseline Adjustment**

Columns for JNJ are chosen here.

In [7]:
# Individualize the data
# Create a baseline dataframe
data = df_JNJ.copy()

baseline = data.groupby('PT_ID').first().reset_index()

# Subtract the baseline values for numeric columns
columns_to_individualize = [col for col in columns_test_JNJ if col not in ['PT_ID', 'CRS on date (0 No, 1 Yes)', 'Agent (JNJ/BMS/Caribou)','datetime']]

for col in columns_to_individualize:
    data[col] = pd.to_numeric(data[col], errors='coerce')
    data[col] = data[col] - data.groupby('PT_ID')[col].transform('first')

data.head()

Unnamed: 0,Agent (JNJ/BMS/Caribou),CAIX,CASP-8,CCL23,CD40-L,CD70,"CRS on date (0 No, 1 Yes)",CXCL10,CXCL11,CXCL13,...,MMP12,PT_ID,TIE2,TNFRSF9,TNFSF14,covered_skin_temperature_avg,datetime,pulse_rate_avg,respiratory_rate_avg,spo2_avg
42639,JNJ,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,...,0.0,7004,0.0,0.0,0.0,0.0,2022-12-05 15:44:00,0.0,0.0,0.0
42640,JNJ,1.8e-05,-0.005952,-0.000287,0.004429,-0.001577,0,-0.001205,0.004758,-5.1e-05,...,-0.000666,7004,-0.000192,-0.001087,-0.000739,0.0,2022-12-05 15:45:00,0.0,0.0,0.0
42641,JNJ,3.6e-05,-0.011904,-0.000574,0.008858,-0.003154,0,-0.00241,0.009515,-0.000102,...,-0.001331,7004,-0.000384,-0.002174,-0.001478,0.01,2022-12-05 15:46:00,0.0,7.21,0.0
42642,JNJ,5.5e-05,-0.017857,-0.000861,0.013288,-0.004731,0,-0.003616,0.014273,-0.000153,...,-0.001997,7004,-0.000576,-0.003261,-0.002216,0.08,2022-12-05 15:47:00,0.0,4.605,0.0
42643,JNJ,7.3e-05,-0.023809,-0.001148,0.017717,-0.006308,0,-0.004821,0.01903,-0.000203,...,-0.002663,7004,-0.000767,-0.004347,-0.002955,0.02,2022-12-05 15:48:00,0.0,2.0,0.0


**6. Feature Engineering: Rolling and Lagged Features**

Additional features are generated to capture short-term trends and variability in the measurements. Past values and rolling statistics over a 8-hour window are computed to provide temporal context for the prediction model.

In [8]:
# Define the window size for rolling statistics (e.g., past 6 hours)
window_size = 8
lag_size = 8
time_interval = 30

# Sort data by patient ID and datetime
data = data.sort_values(by=['PT_ID', 'datetime'])

# Function to create lagged features and rolling statistics
def add_past_features(data, columns,lag_size):
    for col in columns:
        # Add lagged values
        for lag_base in range(1, lag_size + 1):
            lag = lag_base * time_interval
            data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)

        rolling_size = lag_size * time_interval
        # Add rolling statistics
        data[f'{col}_rolling_mean_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).mean().reset_index(level=0, drop=True)
        data[f'{col}_rolling_std_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).std().reset_index(level=0, drop=True)
        data[f'{col}_rolling_min_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).min().reset_index(level=0, drop=True)
        data[f'{col}_rolling_max_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).max().reset_index(level=0, drop=True)


    return data

# Add past features for selected columns
columns_to_process = [col for col in columns_test_JNJ if col not in ['PT_ID','CRS on date (0 No, 1 Yes)','Agent (JNJ/BMS/Caribou)','datetime']]
data = add_past_features(data, columns_to_process, lag_size)

  data[f'{col}_rolling_mean_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).mean().reset_index(level=0, drop=True)
  data[f'{col}_rolling_std_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).std().reset_index(level=0, drop=True)
  data[f'{col}_rolling_min_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).min().reset_index(level=0, drop=True)
  data[f'{col}_rolling_max_{rolling_size}'] = data.groupby('PT_ID')[col].rolling(rolling_size, min_periods=1).max().reset_index(level=0, drop=True)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  data[f'{col}_lag_{lag}'] = data.groupby('PT_ID')[col].shift(lag)
  da

**7. Creating column: CRS in 8 Hours**

In this section, a binary target variable CRS_in_8_hours is created. It indicates whether a patient will experience CRS within the next 8 hours from any given measurement time.

In [9]:
from datetime import timedelta

def assign_crs_in_8_hours(data):
    """
    Assign CRS_in_8_hours for each row based on whether `datetime + 8 hours` falls within a CRS occurrence time frame.

    Parameters:
        data (DataFrame): Input DataFrame with 'PT_ID', 'datetime', and 'CRS on date (0 No, 1 Yes)' columns.

    Returns:
        DataFrame: Updated DataFrame with a new column 'CRS_in_8_hours'.
    """
    # Ensure 'datetime' is a datetime object
    data['datetime'] = pd.to_datetime(data['datetime'])
    data = data.sort_values(by=['PT_ID', 'datetime'])

    # Initialize a new column
    data['CRS_in_8_hours'] = 0

    # Process each patient group separately
    for pt_id, group in data.groupby('PT_ID'):
        # Sort by datetime for the current patient
        group = group.sort_values('datetime')

        # Identify CRS occurrence start and end timeframes
        crs_start = group.index[(group['CRS on date (0 No, 1 Yes)'].shift(1) == 0) &
                                (group['CRS on date (0 No, 1 Yes)'] == 1)].tolist()
        crs_end = group.index[(group['CRS on date (0 No, 1 Yes)'].shift(1) == 1) &
                              (group['CRS on date (0 No, 1 Yes)'] == 0)].tolist()

        # If a CRS event starts but does not end, assume it continues until the last datetime
        if len(crs_start) > len(crs_end):
            crs_end.append(group.index[-1])

        # Assign CRS_in_8_hours for each row
        for start_idx, end_idx in zip(crs_start, crs_end):
            crs_start_time = group.loc[start_idx, 'datetime']
            crs_end_time = group.loc[end_idx, 'datetime']

            # Any datetime + 8 hours within the CRS occurrence timeframe is set to 1
            within_crs_timeframe = (group['datetime'] + timedelta(hours=6) >= crs_start_time) & \
                                   (group['datetime'] + timedelta(hours=6) <= crs_end_time)
            data.loc[group[within_crs_timeframe].index, 'CRS_in_8_hours'] = 1

    return data

# Apply the function to your dataset
data = assign_crs_in_8_hours(data)


In [10]:
# Handle lagged features: Fill missing values with 0
lagged_columns = [col for col in data.columns if 'lag' in col]
data[lagged_columns] = data[lagged_columns].fillna(0)

# Handle rolling statistics: Forward-fill within each patient group
rolling_columns = [col for col in data.columns if 'rolling' in col]
data[rolling_columns] = data.groupby('PT_ID')[rolling_columns].ffill()

# Fill any remaining missing values in rolling statistics with 0
data[rolling_columns] = data[rolling_columns].fillna(0)

# Separate numeric and non-numeric columns in other_columns
other_columns = [col for col in data.columns if col not in lagged_columns + rolling_columns + ['PT_ID', 'datetime']]
numeric_columns = [col for col in other_columns if data[col].dtype in ['int64', 'float64']]
non_numeric_columns = [col for col in other_columns if col not in numeric_columns]

# Handle numeric columns: Mean imputation
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

# Handle non-numeric columns: Fill missing values with mode
for col in non_numeric_columns:
    mode_value = data[col].mode().iloc[0]
    data[col] = data[col].fillna(mode_value)

# Check for remaining missing values
missing_values = data.isnull().sum()
missing_values = missing_values[missing_values > 0]

if missing_values.empty:
    print("No missing values remaining in the dataset.")
else:
    print("Columns with remaining missing values:")
    print(missing_values)

No missing values remaining in the dataset.


In [11]:
data[data['CRS_in_8_hours']==1]

Unnamed: 0,Agent (JNJ/BMS/Caribou),CAIX,CASP-8,CCL23,CD40-L,CD70,"CRS on date (0 No, 1 Yes)",CXCL10,CXCL11,CXCL13,...,spo2_avg_lag_120,spo2_avg_lag_150,spo2_avg_lag_180,spo2_avg_lag_210,spo2_avg_lag_240,spo2_avg_rolling_mean_240,spo2_avg_rolling_std_240,spo2_avg_rolling_min_240,spo2_avg_rolling_max_240,CRS_in_8_hours
63665,JNJ OOS,-1.396280,0.303270,0.435560,1.484800,0.071950,0,1.624240,1.996430,0.801890,...,-3.510,-3.630000,-3.270000,-3.160000,-3.8500,-2.892479,0.913514,-4.87,-0.68,1
63666,JNJ OOS,-1.396280,0.303270,0.435560,1.484800,0.071950,0,1.624240,1.996430,0.801890,...,-3.580,-3.550000,-3.320000,-3.260000,-3.7800,-2.884312,0.914307,-4.87,-0.68,1
63667,JNJ OOS,-1.396280,0.303270,0.435560,1.484800,0.071950,0,1.624240,1.996430,0.801890,...,-3.820,-3.620000,-3.170000,-3.560000,-3.6100,-2.876854,0.915663,-4.87,-0.68,1
63668,JNJ OOS,-1.396280,0.303270,0.435560,1.484800,0.071950,0,1.624240,1.996430,0.801890,...,-3.550,-3.630000,-3.070000,-3.290000,-3.8600,-2.872521,0.913449,-4.87,-0.68,1
63669,JNJ OOS,-1.396280,0.303270,0.435560,1.484800,0.071950,0,1.624240,1.996430,0.801890,...,-3.600,-3.630000,-2.940000,-3.140000,-4.0100,-2.863396,0.912977,-4.87,-0.68,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313952,JNJ,-0.026034,0.619929,0.278777,-0.880114,0.880528,1,2.763175,1.728585,0.362329,...,7.225,4.342941,4.156250,9.312500,8.4700,5.524958,3.222942,-12.00,11.00,1
313953,JNJ,-0.025722,0.620340,0.278712,-0.880123,0.880709,1,2.763756,1.729170,0.362344,...,7.330,4.505000,3.984375,9.140625,9.0000,5.524125,3.222065,-12.00,11.00,1
313954,JNJ,-0.025411,0.620752,0.278647,-0.880132,0.880889,1,2.764337,1.729754,0.362359,...,7.435,4.667059,3.812500,8.968750,7.7425,5.526781,3.224164,-12.00,11.00,1
313955,JNJ,-0.025099,0.621163,0.278581,-0.880141,0.881070,1,2.764919,1.730338,0.362374,...,7.540,4.829118,3.640625,8.796875,6.4850,5.523635,3.223593,-12.00,11.00,1


**8. Model Training and Evaluation**

In this section, the dataset is split by patients into training and test sets using K-fold cross-validation. Several models (LightGBM, CatBoost, XGBoost) are trained to predict CRS_in_8_hours. Random oversampling is used to handle class imbalance. The performance is evaluated using accuracy, AUC-ROC, and classification reports.

**a. LightGBM Model with Oversampling**


A LightGBM classifier is trained and evaluated with cross-validation.

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
from lightgbm import LGBMClassifier

if 'datetime' in data.columns:
    data = data.drop(columns=['datetime'])
if 'cutoff_date' in data.columns:
    data = data.drop(columns=['cutoff_date'])

drop_cols = ['PT_ID', 'CRS_in_8_hours', 'CRS on date (0 No, 1 Yes)', 'Agent (JNJ/BMS/Caribou)']
feature_cols = [col for col in data.columns if col not in drop_cols]

unique_patients = data['PT_ID'].unique()
kf = KFold(n_splits=5, shuffle=False)

cv_accuracies = []
cv_auc_scores = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']

    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']

    # Handle class imbalance with Random Oversampling
    oversampler = RandomOverSampler(random_state=42)
    X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

    # Print class distribution after oversampling
    print("Class distribution after oversampling:")
    print(pd.Series(y_train_resampled).value_counts())

    lgbm_model = LGBMClassifier(
        objective='binary',
        max_depth=6,
        learning_rate=0.1,
        n_estimators=100,
        random_state=42
    )

    # Train the model
    lgbm_model.fit(X_train_resampled, y_train_resampled)

    # Predict on test set
    y_pred = lgbm_model.predict(X_test)
    y_prob = lgbm_model.predict_proba(X_test)[:, 1]

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    auc_score = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("Accuracy:", accuracy)
    print("AUC-ROC Score:", auc_score)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    cv_accuracies.append(accuracy)
    cv_auc_scores.append(auc_score)
    aggregate_conf_matrix += conf_matrix

# Aggregate results
print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.




Fold 1/5
Class distribution after oversampling:
CRS_in_8_hours
0    108387
1    108387
Name: count, dtype: int64
[LightGBM] [Info] Number of positive: 108387, number of negative: 108387
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088809 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 94022
[LightGBM] [Info] Number of data points in the train set: 216774, number of used features: 377
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Accuracy: 0.9269439848735523
AUC-ROC Score: 0.7214410343419587
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.95      0.96     40638
           1       0.19      0.25      0.22      1672

    accuracy                           0.93     42310
   macro avg       0.58      0.60      0.59     42310
weighted avg       

**b. CatBoost Model**


CatBoost classifier is also tested. Class weights are used to handle class imbalance, and a similar cross-validation approach is followed.

In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, recall_score, confusion_matrix
from sklearn.model_selection import KFold
from catboost import CatBoostClassifier

# Drop unnecessary columns if they exist
if 'cutoff_date' in data.columns:
    data = data.drop(columns=['cutoff_date'])

if 'datetime' in data.columns:
    data = data.drop(columns=['datetime'])

# Define columns to drop and feature columns
drop_cols = ['PT_ID', 'CRS on date (0 No, 1 Yes)', 'Agent (JNJ/BMS/Caribou)', 'CRS_in_8_hours']
feature_cols = [col for col in data.columns if col not in drop_cols]

# Unique patients and KFold
unique_patients = data['PT_ID'].unique()
kf = KFold(n_splits=5, shuffle=False)

# Metrics storage
cv_accuracies = []
cv_auc_scores = []
cv_classification_reports = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

# Initialize CatBoost model
catboost_model = CatBoostClassifier(
    loss_function='Logloss',
    max_depth=6,
    learning_rate=0.1,
    iterations=100,
    random_seed=42,
    verbose=False,
    class_weights=[1, 2.0]  # Adjust based on class imbalance
)

# Cross-validation loop
for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    # Train and test patients
    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    # Train and test data
    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']
    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']

    # Fit CatBoost model
    catboost_model.fit(X_train, y_train)

    # Predict probabilities
    y_prob = catboost_model.predict_proba(X_test)[:, 1]

    # Determine best threshold
    thresholds = np.linspace(0, 1, 101)
    best_bal_acc = 0.0
    best_threshold = 0.5

    for th in thresholds:
        y_pred_th = (y_prob >= th).astype(int)
        rec_class0 = recall_score(y_test, y_pred_th, pos_label=0)
        rec_class1 = recall_score(y_test, y_pred_th, pos_label=1)
        bal_acc = 0.5 * (rec_class0 + rec_class1)
        if bal_acc > best_bal_acc:
            best_bal_acc = bal_acc
            best_threshold = th

    # Use best threshold
    y_pred = (y_prob >= best_threshold).astype(int)

    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, digits=2, output_dict=True)

    # Log metrics
    print(f"Best Threshold: {best_threshold:.3f}")
    print(f"Accuracy: {acc:.4f}")
    print(f"AUC-ROC: {auc:.4f}")
    print(f"Balanced Accuracy: {best_bal_acc:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, digits=2))
    print("-" * 50)

    cv_accuracies.append(acc)
    cv_auc_scores.append(auc)
    cv_classification_reports.append(class_report)
    aggregate_conf_matrix += conf_matrix

# Cross-validation results
print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

# Class-specific accuracy
tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")



Fold 1/5
Best Threshold: 0.010
Accuracy: 0.8154
AUC-ROC: 0.8865
Balanced Accuracy: 0.8597
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.81      0.89     40638
           1       0.17      0.91      0.28      1672

    accuracy                           0.82     42310
   macro avg       0.58      0.86      0.59     42310
weighted avg       0.96      0.82      0.87     42310

--------------------------------------------------

Fold 2/5
Best Threshold: 0.040
Accuracy: 0.7716
AUC-ROC: 0.9044
Balanced Accuracy: 0.8710
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.76      0.87     38124
           1       0.13      0.98      0.23      1405

    accuracy                           0.77     39529
   macro avg       0.57      0.87      0.55     39529
weighted avg       0.97      0.77      0.84     39529

--------------------------------------------------

Fold 3/5
Best T

**c. XGBoost Model Training**


In this section, an XGBoost model is trained and evaluated using the same methodology. Random oversampling is applied and performance metrics are calculated to compare against other models.

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import xgboost as xgb

if 'cutoff_date' in data.columns:
    data = data.drop(columns=['cutoff_date'])

if 'datetime' in data.columns:
    data = data.drop(columns=['datetime'])

drop_cols = ['PT_ID', 'CRS on date (0 No, 1 Yes)', 'Agent (JNJ/BMS/Caribou)', 'CRS_in_8_hours']
feature_cols = [col for col in data.columns if col not in drop_cols]

unique_patients = data['PT_ID'].unique()
kf = KFold(n_splits=5, shuffle=False)

cv_accuracies = []
cv_auc_scores = []
cv_classification_reports = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']

    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']


    # Random Oversampling
    oversampler = RandomOverSampler(random_state=42)
    X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

    dtrain = xgb.DMatrix(X_train_resampled, label=y_train_resampled)
    dtest = xgb.DMatrix(X_test, label=y_test)

    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'max_depth': 6,
        'learning_rate': 0.1
    }

    model = xgb.train(params, dtrain, num_boost_round=100)

    y_prob = model.predict(dtest)
    threshold = 0.01
    y_pred = (y_prob > threshold).astype(int)

    accuracy = accuracy_score(y_test, y_pred)
    auc_score = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, output_dict=True)

    print("Accuracy:", accuracy)
    print("AUC-ROC Score:", auc_score)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    cv_accuracies.append(accuracy)
    cv_auc_scores.append(auc_score)
    cv_classification_reports.append(class_report)
    aggregate_conf_matrix += conf_matrix

print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")




Fold 1/5
Accuracy: 0.8945166627274876
AUC-ROC Score: 0.9220486544048268
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.91      0.94     40638
           1       0.16      0.41      0.23      1672

    accuracy                           0.89     42310
   macro avg       0.57      0.66      0.59     42310
weighted avg       0.94      0.89      0.92     42310


Fold 2/5
Accuracy: 0.7367249361228465
AUC-ROC Score: 0.9313649297982869
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.73      0.84     38124
           1       0.12      1.00      0.21      1405

    accuracy                           0.74     39529
   macro avg       0.56      0.86      0.53     39529
weighted avg       0.97      0.74      0.82     39529


Fold 3/5
Accuracy: 0.7738443876953354
AUC-ROC Score: 0.7686904168687482
Classification Report:
               precision    recall  f1-score   support



d. Logistic Regression

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, recall_score, confusion_matrix

logistic_model = LogisticRegression(random_state=42, class_weight='balanced', solver='liblinear')

cv_accuracies = []
cv_auc_scores = []
cv_classification_reports = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']
    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']

    logistic_model.fit(X_train, y_train)
    y_prob = logistic_model.predict_proba(X_test)[:, 1]

    # Determine best threshold
    thresholds = np.linspace(0, 1, 101)
    best_bal_acc = 0.0
    best_threshold = 0.5

    for th in thresholds:
        y_pred_th = (y_prob >= th).astype(int)
        rec_class0 = recall_score(y_test, y_pred_th, pos_label=0)
        rec_class1 = recall_score(y_test, y_pred_th, pos_label=1)
        bal_acc = 0.5 * (rec_class0 + rec_class1)
        if bal_acc > best_bal_acc:
            best_bal_acc = bal_acc
            best_threshold = th

    y_pred = (y_prob >= best_threshold).astype(int)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, digits=2, output_dict=True)

    print(f"Best Threshold: {best_threshold:.3f}")
    print(f"Accuracy: {acc:.4f}")
    print(f"AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

    cv_accuracies.append(acc)
    cv_auc_scores.append(auc)
    cv_classification_reports.append(class_report)
    aggregate_conf_matrix += conf_matrix

# Cross-validation summary
print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")


Fold 1/5




Best Threshold: 0.570
Accuracy: 0.7505
AUC-ROC: 0.8913
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.74      0.85     40638
           1       0.13      0.89      0.22      1672

    accuracy                           0.75     42310
   macro avg       0.56      0.82      0.54     42310
weighted avg       0.96      0.75      0.83     42310


Fold 2/5




Best Threshold: 0.740
Accuracy: 0.7432
AUC-ROC: 0.8825
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.73      0.85     38124
           1       0.12      1.00      0.22      1405

    accuracy                           0.74     39529
   macro avg       0.56      0.87      0.53     39529
weighted avg       0.97      0.74      0.82     39529


Fold 3/5




Best Threshold: 0.980
Accuracy: 0.8225
AUC-ROC: 0.8188
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.87      0.87     28259
           1       0.75      0.72      0.73     14424

    accuracy                           0.82     42683
   macro avg       0.80      0.80      0.80     42683
weighted avg       0.82      0.82      0.82     42683


Fold 4/5




Best Threshold: 0.690
Accuracy: 0.8746
AUC-ROC: 0.7844
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.92      0.93     15567
           1       0.54      0.61      0.57      2477

    accuracy                           0.87     18044
   macro avg       0.74      0.77      0.75     18044
weighted avg       0.88      0.87      0.88     18044


Fold 5/5




Best Threshold: 0.460
Accuracy: 0.8968
AUC-ROC: 0.7054
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.96      0.94     26437
           1       0.66      0.49      0.56      4153

    accuracy                           0.90     30590
   macro avg       0.79      0.72      0.75     30590
weighted avg       0.89      0.90      0.89     30590


Cross-Validation Results:
Average Accuracy: 0.8175012360885828
Average AUC-ROC: 0.8164777501120504
Aggregated Confusion Matrix:
 [[122606  26419]
 [  7289  16842]]
Accuracy for class 0: 0.8227210199630934
Accuracy for class 1: 0.6979404086030417


e. Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=6,
    random_state=42,
    class_weight='balanced'
)

cv_accuracies = []
cv_auc_scores = []
cv_classification_reports = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']
    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']

    rf_model.fit(X_train, y_train)
    y_prob = rf_model.predict_proba(X_test)[:, 1]

    thresholds = np.linspace(0, 1, 101)
    best_bal_acc = 0.0
    best_threshold = 0.5

    for th in thresholds:
        y_pred_th = (y_prob >= th).astype(int)
        rec_class0 = recall_score(y_test, y_pred_th, pos_label=0)
        rec_class1 = recall_score(y_test, y_pred_th, pos_label=1)
        bal_acc = 0.5 * (rec_class0 + rec_class1)
        if bal_acc > best_bal_acc:
            best_bal_acc = bal_acc
            best_threshold = th

    y_pred = (y_prob >= best_threshold).astype(int)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, digits=2, output_dict=True)

    print(f"Best Threshold: {best_threshold:.3f}")
    print(f"Accuracy: {acc:.4f}")
    print(f"AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

    cv_accuracies.append(acc)
    cv_auc_scores.append(auc)
    cv_classification_reports.append(class_report)
    aggregate_conf_matrix += conf_matrix

print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")



Fold 1/5
Best Threshold: 0.310
Accuracy: 0.8605
AUC-ROC: 0.9161
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.86      0.92     40638
           1       0.21      0.94      0.35      1672

    accuracy                           0.86     42310
   macro avg       0.61      0.90      0.63     42310
weighted avg       0.97      0.86      0.90     42310


Fold 2/5
Best Threshold: 0.650
Accuracy: 0.9006
AUC-ROC: 0.9223
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.90      0.95     38124
           1       0.26      1.00      0.42      1405

    accuracy                           0.90     39529
   macro avg       0.63      0.95      0.68     39529
weighted avg       0.97      0.90      0.93     39529


Fold 3/5
Best Threshold: 0.050
Accuracy: 0.7917
AUC-ROC: 0.7984
Classification Report:
               precision    recall  f1-score   support

           0       0.86 

f. KNN

In [17]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=5)

cv_accuracies = []
cv_auc_scores = []
cv_classification_reports = []
aggregate_conf_matrix = np.array([[0, 0],
                                  [0, 0]])

for fold, (train_idx, test_idx) in enumerate(kf.split(unique_patients)):
    print(f"\nFold {fold + 1}/5")

    train_patients = unique_patients[train_idx]
    test_patients = unique_patients[test_idx]

    train_data = data[data['PT_ID'].isin(train_patients)]
    test_data = data[data['PT_ID'].isin(test_patients)]

    X_train = train_data[feature_cols]
    y_train = train_data['CRS_in_8_hours']
    X_test = test_data[feature_cols]
    y_test = test_data['CRS_in_8_hours']

    knn_model.fit(X_train, y_train)
    y_prob = knn_model.predict_proba(X_test)[:, 1]

    thresholds = np.linspace(0, 1, 101)
    best_bal_acc = 0.0
    best_threshold = 0.5

    for th in thresholds:
        y_pred_th = (y_prob >= th).astype(int)
        rec_class0 = recall_score(y_test, y_pred_th, pos_label=0)
        rec_class1 = recall_score(y_test, y_pred_th, pos_label=1)
        bal_acc = 0.5 * (rec_class0 + rec_class1)
        if bal_acc > best_bal_acc:
            best_bal_acc = bal_acc
            best_threshold = th

    y_pred = (y_prob >= best_threshold).astype(int)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, digits=2, output_dict=True)

    print(f"Best Threshold: {best_threshold:.3f}")
    print(f"Accuracy: {acc:.4f}")
    print(f"AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

    cv_accuracies.append(acc)
    cv_auc_scores.append(auc)
    cv_classification_reports.append(class_report)
    aggregate_conf_matrix += conf_matrix

print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(cv_accuracies)}")
print(f"Average AUC-ROC: {np.mean(cv_auc_scores)}")
print("Aggregated Confusion Matrix:\n", aggregate_conf_matrix)

tn, fp, fn, tp = aggregate_conf_matrix.ravel()
class0_accuracy = tn / (tn + fp) if (tn + fp) > 0 else 0
class1_accuracy = tp / (fn + tp) if (fn + tp) > 0 else 0

print(f"Accuracy for class 0: {class0_accuracy}")
print(f"Accuracy for class 1: {class1_accuracy}")



Fold 1/5
Best Threshold: 0.010
Accuracy: 0.8463
AUC-ROC: 0.5822
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.87      0.92     40638
           1       0.09      0.30      0.13      1672

    accuracy                           0.85     42310
   macro avg       0.53      0.58      0.52     42310
weighted avg       0.93      0.85      0.88     42310


Fold 2/5
Best Threshold: 0.210
Accuracy: 0.8549
AUC-ROC: 0.6704
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.87      0.92     38124
           1       0.12      0.49      0.19      1405

    accuracy                           0.85     39529
   macro avg       0.55      0.68      0.56     39529
weighted avg       0.95      0.85      0.89     39529


Fold 3/5
Best Threshold: 0.810
Accuracy: 0.6628
AUC-ROC: 0.5460
Classification Report:
               precision    recall  f1-score   support

           0       0.69 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best Threshold: 0.000
Accuracy: 0.1373
AUC-ROC: 0.4345
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     15567
           1       0.14      1.00      0.24      2477

    accuracy                           0.14     18044
   macro avg       0.07      0.50      0.12     18044
weighted avg       0.02      0.14      0.03     18044


Fold 5/5
Best Threshold: 0.010
Accuracy: 0.8677
AUC-ROC: 0.5361
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.99      0.93     26437
           1       0.59      0.08      0.14      4153

    accuracy                           0.87     30590
   macro avg       0.73      0.54      0.54     30590
weighted avg       0.83      0.87      0.82     30590


Cross-Validation Results:
Average Accuracy: 0.6737907512125523
Average AUC-ROC: 0.5538451302725076
Aggregated Confusion Matrix:
 [[120095  28930]
 [ 17316   6815]]
Accuracy for 

Results Evaluation

Random Forest was chosen here due to its superior Class 1 performance. While CatBoost showed a slightly higher AUC-ROC (0.8938 vs. Random Forest’s 0.8932) and overall accuracy (0.8552 vs. 0.8296), Random Forest achieved a much higher Class 1 accuracy (~0.8265 vs. 0.7081).

This improved sensitivity is critical for ensuring that CRS patients are correctly identified at the 8-hour interval, which is likely more beneficial for patient outcomes than small differences in overall metrics.