In [None]:
import pandas as pd
import numpy as np

# Reloading the dataset to work with the variables
file_path = "CKD_EHR.csv"
data = pd.read_csv(file_path)

In [None]:
!pip install scikit-survival
!pip install lifelines
!pip install torchtuples

!pip install scikit-learn==1.2.2
!pip install imbalanced-learn==0.9.1

Collecting scikit-survival
  Downloading scikit_survival-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/49.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.0/49.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Downloading scikit_survival-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-survival
Successfully installed scikit-survival-0.23.1
Collecting lifelines
  Downloading lifelines-0.30.0-py3-none-any.whl.metadata (3.2 kB)
Collecting autograd-gamma>=0.3 (from lifelines)
  Downloading autograd-gamma-0.5.0.tar.gz (4.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting formulaic>=0.2.2 (from lifelines)
  Downloading formulaic

In [None]:
# Basic libraries
import pandas as pd
import numpy as np
import random
import os

# Survival analysis
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from lifelines.plotting import add_at_risk_counts
from lifelines.statistics import proportional_hazard_test

# Machine learning and data processing
from sklearn.model_selection import RepeatedStratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import roc_auc_score

# XGBoost library
import xgboost as xgb

# PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Plotting
import matplotlib.pyplot as plt

def seed_everything(seed=42):
    """
    Seed everything to make all operations in PyTorch deterministic.
    Args:
        seed (int): Seed value to set. Default is 42.
    """
    random.seed(seed)         # Python random module.
    np.random.seed(seed)      # Numpy module.
    os.environ['PYTHONHASHSEED'] = str(seed)  # Set PYTHONHASHSEED env variable at a fixed value

    torch.manual_seed(seed)   # Sets the seed for generating random numbers for all devices (both CPU and CUDA).
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # For multi-GPU setups.

    torch.backends.cudnn.deterministic = True  # Forces to use the deterministic algorithm
    torch.backends.cudnn.benchmark = False     # If True, causes cuDNN to benchmark multiple convolution algorithms and select the fastest.

seed_everything()

In [None]:
# Importing pandas to reload the data
file_path = "CKD_EHR.csv"
data = pd.read_csv(file_path)

# Assuming `data` is already loaded with original column names
# Dictionary for renaming columns with the updated meta-type prefixes
rename_dict = {
    'AgeBaseline': 'DEM_Age',
    'Sex': 'DEM_Sex',
    'HistorySmoking': 'DEM_Smoking_History',
    'HistoryObesity': 'DEM_Obesity_History',

    'CholesterolBaseline': 'CBM_Cholesterol',
    'CreatinineBaseline': 'CBM_Creatinine',
    'eGFRBaseline': 'CBM_eGFR',
    'sBPBaseline': 'CBM_Systolic_BP',
    'dBPBaseline': 'CBM_Diastolic_BP',
    'BMIBaseline': 'CBM_BMI',

    'HistoryDiabetes': 'MH_Diabetes_History',
    'HistoryCHD': 'MH_CHD_History',
    'HistoryVascular': 'MH_Vascular_History',
    'HistoryHTN ': 'MH_Hypertension_History',
    'HistoryDLD': 'MH_Dyslipidemia_History',

    'DLDmeds': 'MED_DLD_Meds',
    'DMmeds': 'MED_Diabetes_Meds',
    'HTNmeds': 'MED_HTN_Meds',
    'ACEIARB': 'MED_ACEI_ARB_Use',

    'EventCKD35': 'OUT_EventCKD35',
    'TimeToEventMonths': 'OUT_TimeToEventMonths'
}

# Renaming columns in the dataset
data.rename(columns=rename_dict, inplace=True)

# Reorganizing columns by meta-class
# Defining the desired column order based on meta-types
ordered_columns = [
    # Demographic and Lifestyle Variables
    'DEM_Age', 'DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',

    # Clinical Baseline Measurements
    'CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR', 'CBM_Systolic_BP', 'CBM_Diastolic_BP', 'CBM_BMI',

    # Medical History Variables
    'MH_Diabetes_History', 'MH_CHD_History', 'MH_Vascular_History', 'MH_Hypertension_History', 'MH_Dyslipidemia_History',

    # Medication Use Variables
    'MED_DLD_Meds', 'MED_Diabetes_Meds', 'MED_HTN_Meds', 'MED_ACEI_ARB_Use',

    # Outcome Variables
    'OUT_EventCKD35', 'OUT_TimeToEventMonths'
]

# Reordering the columns in the DataFrame
data = data[ordered_columns]

import copy
my_df = copy.copy(data)

my_df.head()

Unnamed: 0,DEM_Age,DEM_Sex,DEM_Smoking_History,DEM_Obesity_History,CBM_Cholesterol,CBM_Creatinine,CBM_eGFR,CBM_Systolic_BP,CBM_Diastolic_BP,CBM_BMI,...,MH_CHD_History,MH_Vascular_History,MH_Hypertension_History,MH_Dyslipidemia_History,MED_DLD_Meds,MED_Diabetes_Meds,MED_HTN_Meds,MED_ACEI_ARB_Use,OUT_EventCKD35,OUT_TimeToEventMonths
0,64,0,0,1,4.8,59.0,93.3,144,87,40,...,0,0,1,1,1,0,1,0,0,98
1,52,0,0,1,6.4,52.0,105.8,148,91,45,...,0,0,1,1,0,0,1,0,0,106
2,56,0,0,1,6.4,57.0,99.8,149,86,41,...,0,0,1,1,1,0,1,0,0,88
3,58,0,0,1,5.1,65.0,90.3,116,68,32,...,0,0,0,1,1,0,0,0,0,103
4,63,0,0,1,5.0,70.0,79.7,132,63,31,...,0,0,1,1,1,1,1,1,0,105


In [None]:
# Binary conversion function: set 1 for any condition present, 0 for none
def convert_to_binary(series, positive_conditions):
    return series.apply(lambda x: 1 if x in positive_conditions else 0)

# Creating stratified columns
my_df2 = copy.copy(my_df)

#---
my_df2['Strat_Age']         = my_df2['DEM_Age'].apply(lambda x: 'Older' if x >= 65 else 'Younger')
#---
my_df2['Strat_eGFR']        = my_df2['CBM_eGFR'].apply(lambda x: 'Normal' if x >= 90 else 'Non-Ideal')
#---
my_df2['Strat_Diabetes']    = my_df2.apply(
                                lambda  row:  'Diabetes With Medication' if \
                                                  row['MH_Diabetes_History'] == 1 and \
                                                  row['MED_Diabetes_Meds'] == 1 else \
                                              'Diabetes Without Medication' if \
                                                  row['MH_Diabetes_History'] == 1 else \
                                              'No Diabetes',
                                axis=1)
#---
my_df2['Strat_Hypertension'] = my_df2.apply(
                                lambda row:   'Hypertension With Medication' if \
                                                  row['MH_Hypertension_History'] == 1 and \
                                                  (row['MED_HTN_Meds'] == 1 or row['MED_ACEI_ARB_Use'] == 1) else \
                                              'Hypertension Without Medication' if \
                                                  row['MH_Hypertension_History'] == 1 else
                                              'No Hypertension',
                                axis=1)
#---
my_df2['Strat_BP_Level']     = my_df2.apply(
                                lambda row:   'Elevated BP' if \
                                                  row['CBM_Systolic_BP'] >= 140 or \
                                                  row['CBM_Diastolic_BP'] >= 90 else \
                                              'Normal BP',
                                axis=1)
#---
my_df2['Strat_Obesity']      = my_df2.apply(
                                lambda row:   'Obese' if \
                                                  row['CBM_BMI'] >= 30 or row['DEM_Obesity_History'] == 1 else \
                                              'Not Obese',
                                axis=1)
#---
my_df2['Strat_CVD']          = my_df2.apply(
                                lambda row:   'CVD' if \
                                                  row['MH_CHD_History'] == 1 or \
                                                  row['MH_Vascular_History'] == 1 else \
                                              'No CVD',
                                axis=1)
#---
my_df2['Strat_Lipid']        = my_df2.apply(
                                lambda row:   'Dyslipidemia With Medication' if \
                                                  row['MH_Dyslipidemia_History'] == 1 and \
                                                  row['MED_DLD_Meds'] == 1 else \
                                              'Dyslipidemia Without Medication' if \
                                                  row['MH_Dyslipidemia_History'] == 1 else \
                                              'No Dyslipidemia',
                                axis=1)
#---
my_df2['Strat_Dia_Fin']      = my_df2['Strat_Diabetes'].apply(
                                lambda x:     1 if  x != 'No Diabetes' else 0)
#---
my_df2['Strat_Hyper_Fin']    = my_df2.apply(
                                lambda row:   1 if  row['Strat_Hypertension'] != 'No Hypertension' or \
                                                    row['Strat_BP_Level'] == 'Elevated BP' else \
                                              0,
                                axis=1)
#---
my_df2['Strat_Lipid_Fin']    = my_df2['Strat_Lipid'].apply(
                                lambda x:     1 if  x != 'No Dyslipidemia' else 0)

###===###
my_df2['Strat_Age']           = convert_to_binary(my_df2['Strat_Age'], ['Older'])
my_df2['Strat_eGFR']          = convert_to_binary(my_df2['Strat_eGFR'], ['Non-Ideal'])
my_df2['Strat_Diabetes']      = convert_to_binary(my_df2['Strat_Diabetes'], ['Diabetes With Medication', 'Diabetes Without Medication'])
my_df2['Strat_Hypertension']  = convert_to_binary(my_df2['Strat_Hypertension'], ['Hypertension With Medication', 'Hypertension Without Medication'])
my_df2['Strat_BP_Level']      = convert_to_binary(my_df2['Strat_BP_Level'], ['Elevated BP'])
my_df2['Strat_Obesity']       = convert_to_binary(my_df2['Strat_Obesity'], ['Obese'])
my_df2['Strat_CVD']           = convert_to_binary(my_df2['Strat_CVD'], ['CVD'])
my_df2['Strat_Lipid']         = convert_to_binary(my_df2['Strat_Lipid'], ['Dyslipidemia With Medication', 'Dyslipidemia Without Medication'])

In [None]:
# DEM_ variables
dem_sex_ratio             = my_df2['DEM_Sex'].value_counts(normalize=True)
strat_age_ratio           = my_df2['Strat_Age'].value_counts(normalize=True)
dem_smoking_history_ratio = my_df2['DEM_Smoking_History'].value_counts(normalize=True)

# non-DEM_ stratified variables, including the new flags
strat_egfr_ratio          = my_df2['Strat_eGFR'].value_counts(normalize=True)
strat_diabetes_ratio      = my_df2['Strat_Diabetes'].value_counts(normalize=True)
strat_hypertension_ratio  = my_df2['Strat_Hypertension'].value_counts(normalize=True)
strat_bp_level_ratio      = my_df2['Strat_BP_Level'].value_counts(normalize=True)
strat_obesity_ratio       = my_df2['Strat_Obesity'].value_counts(normalize=True)
strat_cvd_ratio           = my_df2['Strat_CVD'].value_counts(normalize=True)
strat_lipid_ratio         = my_df2['Strat_Lipid'].value_counts(normalize=True)
strat_dia_fin_ratio       = my_df2['Strat_Dia_Fin'].value_counts(normalize=True)
strat_hyper_fin_ratio     = my_df2['Strat_Hyper_Fin'].value_counts(normalize=True)
strat_lipid_fin_ratio     = my_df2['Strat_Lipid_Fin'].value_counts(normalize=True)

# Storing results in a structured dictionary
ratios = {
    "DEM_Sex": dem_sex_ratio,
    "Strat_Age": strat_age_ratio,
    "DEM_Smoking_History": dem_smoking_history_ratio,
    "Strat_eGFR": strat_egfr_ratio,
    "Strat_BP_Level": strat_bp_level_ratio,
    "Strat_Obesity": strat_obesity_ratio,
    "Strat_CVD": strat_cvd_ratio,
    "Strat_Dia_Fin": strat_dia_fin_ratio,
    "Strat_Hyper_Fin": strat_hyper_fin_ratio,
    "Strat_Lipid_Fin": strat_lipid_fin_ratio
}

np.round(pd.DataFrame(ratios), 4).T

Unnamed: 0,0,1
DEM_Sex,0.4908,0.5092
Strat_Age,0.7719,0.2281
DEM_Smoking_History,0.8473,0.1527
Strat_eGFR,0.6721,0.3279
Strat_BP_Level,0.6904,0.3096
Strat_Obesity,0.4949,0.5051
Strat_CVD,0.8615,0.1385
Strat_Dia_Fin,0.5621,0.4379
Strat_Hyper_Fin,0.3177,0.6823
Strat_Lipid_Fin,0.3544,0.6456


In [None]:
seed_everything()

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import lifelines
import math

class EvalSurv:
    #---
    def __init__(self, df):
        self.df = df
        self.df["PARTIAL_HAZARD"] = np.exp(self.df["LPH"])

        self.base_surv = None

    #---
    def get_risk_perc(self, at_time):
        return 100 * self.get_risk(at_time)

    def get_risk(self, at_time):
        return 1 - self.get_surv(at_time)

    def get_surv(self, at_time):
        # Handle time zero explicitly
        if at_time == 0:
            return np.ones(len(self.df))

        return np.power(
                  self.get_base_surv(at_time),
                  self.df["PARTIAL_HAZARD"]
              )

    def get_base_surv(self, at_time):
        if self.base_surv is None:
            self.compute_baseline_survival()

        return self.base_surv.loc[self.base_surv.index <= at_time].min()

    #---
    def compute_baseline_survival(self):
        df = self.df[["TIME", "EVENT", "PARTIAL_HAZARD"]]
        df = df.groupby(["TIME"]).sum().\
                sort_index(ascending = False)

        df["CUM_PARTIAL_HAZARD"] = df["PARTIAL_HAZARD"].cumsum()

        df = df[df["EVENT"] > 0]
        df["ALPHA"] = np.exp(
                        -df["EVENT"] / df["CUM_PARTIAL_HAZARD"])
        df.sort_index(inplace = True)
        df["S0"] = df["ALPHA"].cumprod()
        self.base_surv = df["S0"]


In [None]:
seed_everything()

from sklearn.model_selection import train_test_split
import copy

data_copy = copy.copy(my_df2)
covariate_cols = [
       'DEM_Age', 'DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
       'CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR', 'CBM_Systolic_BP',
       'CBM_Diastolic_BP', 'CBM_BMI', 'MH_Diabetes_History', 'MH_CHD_History',
       'MH_Vascular_History', 'MH_Hypertension_History',
       'MH_Dyslipidemia_History', 'MED_DLD_Meds', 'MED_Diabetes_Meds',
       'MED_HTN_Meds', 'MED_ACEI_ARB_Use'
]
time_col = 'OUT_TimeToEventMonths'
event_col = 'OUT_EventCKD35'

Strat_col = ['Strat_Age', 'Strat_eGFR', 'Strat_BP_Level', 'Strat_Obesity',
             'Strat_CVD', 'Strat_Dia_Fin', 'Strat_Hyper_Fin', 'Strat_Lipid_Fin']

my_Selected = covariate_cols + [time_col, event_col] + Strat_col
data_copy = data_copy[my_Selected]

Calibration_df = copy.copy(data_copy)

num_folds = 5
for i in range(num_folds):
  df_trn, df_tst = \
    train_test_split(
        Calibration_df,
        test_size=0.5,
        train_size = 0.5,
        shuffle = True,
        stratify = Calibration_df[event_col])

  Calibration_df.loc[df_trn.index, "FOLD_"+str(i)] = 0
  Calibration_df.loc[df_tst.index, "FOLD_"+str(i)] = 1

Calibration_df.to_csv("CKD_FoldSwap.csv")

In [None]:
def process_data_base(Calibration_df):
    file_suffix = "original"
    PredictorCols = covariate_cols

    for fold in range(num_folds):
        for swap in range(2):
            idx = (Calibration_df[f"FOLD_{fold}"] == (1 - swap))
            x = Calibration_df.loc[idx, PredictorCols]
            time  = Calibration_df.loc[idx, time_col]
            event = Calibration_df.loc[idx, event_col]

            tim = pd.DataFrame(x)
            tim["TIME"] = time
            tim["EVENT"] = event

            cph = CoxPHFitter(penalizer=0.01)

            cph.fit(tim, duration_col='TIME', event_col='EVENT')

            # Generate predictions for the swap set
            idx = (Calibration_df[f"FOLD_{fold}"] == swap)
            x = Calibration_df.loc[idx, PredictorCols]

            df_cox = pd.DataFrame(
                {"LPH": np.dot(x - cph._norm_mean.values, cph.params_)}
            )

            df_cox.to_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}.csv")


In [None]:
seed_everything()

process_data_base(Calibration_df)

In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import RandomOverSampler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_RandomOverSampler = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = RandomOverSampler(sampling_strategy='auto', random_state=now_cur_itr)
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_RandomOverSampler[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3

In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_SMOTE = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = SMOTE(sampling_strategy='auto', random_state=now_cur_itr,
                                  k_neighbors=min(3, len(cur_Loc) - 1))
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_SMOTE[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3

In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTENC
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_SMOTENC = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = SMOTENC(sampling_strategy='auto', random_state=now_cur_itr,
                                    k_neighbors=min(3, len(cur_Loc) - 1),
                                    categorical_features=[1, 2, 3, 10, 11, 12, 13, 14, 15, 16, 17, 18])
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_SMOTENC[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)




Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)




Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)




Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




###===######===######===###
DEM_Smoking_History
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)




Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)




Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_eGFR
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)




Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




###===######===######===###
Strat_BP_Level
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)




Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)




Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Obesity
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)




Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)




Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




###===######===######===###
Strat_CVD
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)




Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)




Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Dia_Fin
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Iteration 1 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 2 (Successful so far: 1)




Attempting iteration 3 (Successful so far: 2)
Attempting iteration 4 (Successful so far: 3)
Iteration 4 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 5 (Successful so far: 3)
Attempting iteration 6 (Successful so far: 4)
Iteration 6 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 7 (Successful so far: 4)




#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)




Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Hyper_Fin
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Iteration 0 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 1 (Successful so far: 0)
Iteration 1 failed due to error: The target 'y' needs to have more than 1 class. Got 1 class instead. Retrying...
Attempting iteration 2 (Successful so far: 0)
Iteration 2 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 3 (Successful so far: 0)
Iteration 3 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 4. Retrying...
Attempting iteration 4 (Successful so far: 0)
Iteration 4 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 5 (Successful so far: 0)
Iteration 5 failed due to error: 



Iteration 11 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 4. Retrying...
Attempting iteration 12 (Successful so far: 1)
Attempting iteration 13 (Successful so far: 2)
Iteration 13 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 14 (Successful so far: 2)
Attempting iteration 15 (Successful so far: 3)
Iteration 15 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 16 (Successful so far: 3)
Iteration 16 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 17 (Successful so far: 3)
Attempting iteration 18 (Successful so far: 4)
Iteration 18 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 19 (Successful so far: 4)
Iteration 19 failed due to error: 



Iteration 20 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 21 (Successful so far: 4)
Iteration 21 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 22 (Successful so far: 4)
Iteration 22 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 23 (Successful so far: 4)
Iteration 23 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 24 (Successful so far: 4)
Iteration 24 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 4. Retrying...
Attempting iteration 25 (Successful so far: 4)
Iteration 25 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 26 (Successful so far: 4)
Iteration 



Iteration 39 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 4. Retrying...
Attempting iteration 40 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)




Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Lipid_Fin
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Iteration 1 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 2 (Successful so far: 1)




Iteration 2 failed due to error: Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4. Retrying...
Attempting iteration 3 (Successful so far: 1)
Attempting iteration 4 (Successful so far: 2)
Attempting iteration 5 (Successful so far: 3)
Attempting iteration 6 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)




Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)




In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import ADASYN
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_ADASYN = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = ADASYN(sampling_strategy='auto', random_state=now_cur_itr,
                                   n_neighbors=min(3, len(cur_Loc) - 1))
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_ADASYN[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3

In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import BorderlineSMOTE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_BorderlineSMOTE = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = BorderlineSMOTE(sampling_strategy='auto', random_state=now_cur_itr,
                                   k_neighbors=min(3, len(cur_Loc) - 1))
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_BorderlineSMOTE[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3

In [None]:
seed_everything()

import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SVMSMOTE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

import copy

Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                    'MH_Diabetes_History', 'MH_CHD_History',
                    'MH_Vascular_History', 'MH_Hypertension_History',
                    'MH_Dyslipidemia_History',
                    'MED_DLD_Meds', 'MED_Diabetes_Meds',
                    'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                      'CBM_Systolic_BP',
                      'CBM_Diastolic_BP', 'CBM_BMI']

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC_SVMSMOTE = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        # Execute until we have 5 successful iterations
        print("#---")
        print(f"now01: {now01}")
        print("#---")

        now_cur_itr = 0
        successful_iterations = 0

        while successful_iterations < 5:
            try:
                print(f"Attempting iteration {now_cur_itr} (Successful so far: {successful_iterations})")

                # Sample and prepare data
                my_df2_sampled = my_df2.sample(frac=0.5, random_state=now_cur_itr)
                my_df2_sampled = my_df2_sampled.reset_index(drop=True)
                cur_Loc = list(np.where(my_df2_sampled[Cur_Strat].values == now01)[0])
                cur_strated_df = copy.copy(my_df2_sampled.iloc[cur_Loc, :])
                cur_strated_df = cur_strated_df.reset_index(drop=True)

                # Set up X, y, duration variables
                y = cur_strated_df['OUT_EventCKD35']

                # SMOTE resampling with a safer k_neighbors parameter
                SampySamp = SVMSMOTE(sampling_strategy='auto', random_state=now_cur_itr,
                                   k_neighbors=min(3, len(cur_Loc) - 1))
                X_resampled, y_resampled = SampySamp.fit_resample(cur_strated_df, y)

                # Continue with the rest of the process as usual
                synth_data_df = pd.DataFrame(X_resampled, columns=cur_strated_df.columns)

                # Adjust data types
                synth_data_df[Selected_Bin_Col] = \
                    (synth_data_df[Selected_Bin_Col] > 0.5).astype(int)
                synth_data_df[Selected_Num_Col] = np.round(synth_data_df[Selected_Num_Col])
                synth_data_df[Selected_Num_Col_r] = np.round(synth_data_df[Selected_Num_Col_r], 2)

                # Store in the final dictionary
                All_Synth_Data_50PC_SVMSMOTE[f"{Cur_Strat}_{now01}_{successful_iterations}"] = synth_data_df

                # Increment successful iteration count and move to next iteration
                successful_iterations += 1

            except ValueError as e:
                print(f"Iteration {now_cur_itr} failed due to error: {e}. Retrying...")

            # Increment iteration index for the next try
            now_cur_itr += 1

###===######===######===###
DEM_Sex
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
###===######===######===###
Strat_Age
#---
now01: 0
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3 (Successful so far: 3)
Attempting iteration 4 (Successful so far: 4)
#---
now01: 1
#---
Attempting iteration 0 (Successful so far: 0)
Attempting iteration 1 (Successful so far: 1)
Attempting iteration 2 (Successful so far: 2)
Attempting iteration 3

In [None]:
print("###===###")
print(f"synth_df_ROS: {All_Synth_Data_50PC_RandomOverSampler[list(All_Synth_Data_50PC_RandomOverSampler.keys())[0]].shape}")
print("#---")
print(f"synth_df_SMOTE: {All_Synth_Data_50PC_SMOTE[list(All_Synth_Data_50PC_SMOTE.keys())[0]].shape}")
print("#---")
print(f"synth_df_SMOTENC: {All_Synth_Data_50PC_SMOTENC[list(All_Synth_Data_50PC_SMOTENC.keys())[0]].shape}")
print("#---")
print(f"synth_df_ADASYN: {All_Synth_Data_50PC_ADASYN[list(All_Synth_Data_50PC_ADASYN.keys())[0]].shape}")
print("#---")
print(f"synth_df_BorderlineSMOTE: {All_Synth_Data_50PC_BorderlineSMOTE[list(All_Synth_Data_50PC_BorderlineSMOTE.keys())[0]].shape}")
print("#---")
print(f"synth_df_SVMSMOTE: {All_Synth_Data_50PC_SVMSMOTE[list(All_Synth_Data_50PC_SVMSMOTE.keys())[0]].shape}")

###===###
synth_df_ROS: (214, 32)
#---
synth_df_SMOTE: (214, 32)
#---
synth_df_SMOTENC: (214, 32)
#---
synth_df_ADASYN: (217, 32)
#---
synth_df_BorderlineSMOTE: (214, 32)
#---
synth_df_SVMSMOTE: (175, 32)


In [None]:
list(All_Synth_Data_50PC_SVMSMOTE.keys())[0]

'DEM_Sex_0_0'

In [None]:
def process_data_stratified(Calibration_df, Current_AUG, NAME):

    PredictorCols = covariate_cols

    ###===######===######===######===######===######===###
    for Cur_Strat in list(ratios.keys()):
        print("###===######===######===###")
        print(Cur_Strat)

        ###===######===######===######===######===######===###
        for now01 in [0, 1]:
            for now_cur_itr in range(5):
                ###===######===######===######===######===######===###
                Cur_keys = f"{Cur_Strat}_{now01}_{now_cur_itr}"

                file_suffix = "augmented_" + Cur_keys

                ###===######===######===######===######===######===###
                for fold in range(num_folds):
                    for swap in range(2):
                        idx = (Calibration_df[f"FOLD_{fold}"] == (1 - swap))
                        x = Calibration_df.loc[idx, PredictorCols]
                        time  = Calibration_df.loc[idx, time_col]
                        event = Calibration_df.loc[idx, event_col]

                        tim = pd.DataFrame(x)
                        tim["TIME"] = time
                        tim["EVENT"] = event

                        cph = CoxPHFitter(penalizer=0.01)

                        ###===######===######===######===######===######===###
                        synth_data_df = Current_AUG[Cur_keys]
                        tim_synth           = synth_data_df[PredictorCols]
                        tim_synth["TIME"]   = synth_data_df[time_col]
                        tim_synth["EVENT"]  = synth_data_df[event_col]

                        cph.fit(pd.concat([tim, tim_synth]), duration_col='TIME', event_col='EVENT')

                        ###===######===######===######===######===######===###
                        # Generate predictions for the swap set
                        idx = (Calibration_df[f"FOLD_{fold}"] == swap)
                        x = Calibration_df.loc[idx, PredictorCols]
                        df_cox = pd.DataFrame(
                            {"LPH": np.dot(x - cph._norm_mean.values, cph.params_)}
                        )

                        df_cox.to_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}_{NAME}.csv")


In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_RandomOverSampler,
                        NAME = "RandomOverSampler")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_SMOTE,
                        NAME = "SMOTE")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_SMOTENC,
                        NAME = "SMOTENC")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_ADASYN,
                        NAME = "ADASYN")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_BorderlineSMOTE,
                        NAME = "BorderlineSMOTE")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
seed_everything()

process_data_stratified(Calibration_df,
                        Current_AUG = All_Synth_Data_50PC_SVMSMOTE,
                        NAME = "SVMSMOTE")

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["EVENT"]  = synth_data_df[event_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

###===######===######===######===######===###
###===######===######===######===######===###
def compute_calibration_slope(calibration_data):
    # Fit a linear regression model
    reg = LinearRegression(fit_intercept = False)
    reg.fit(calibration_data["RISK_PERC"].values.reshape(-1, 1),
            calibration_data["EVENT_PERC"].values)

    # Get the calibration slope
    calibration_slope = reg.coef_[0]
    return calibration_slope

###===######===######===######===######===###
###===######===######===######===######===###
def analyze_now(Calibration_df, file_suffix, cur_var, cur_value):
    PredictorCols = covariate_cols

    x     = Calibration_df[PredictorCols]
    time  = Calibration_df[time_col]
    event = Calibration_df[event_col]

    df_cox = pd.DataFrame(x)
    df_cox["TIME"] = time
    df_cox["EVENT"] = event

    lph_matrix_cox = np.zeros((df_cox.shape[0], num_folds))

    for fold in range(num_folds):
        for swap in range(2):
            idx = (Calibration_df[f"FOLD_{fold}"] == swap)
            temp = pd.read_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}.csv")
            lph_matrix_cox[idx, fold] = temp["LPH"]

    df_cox["LPH"] = lph_matrix_cox.mean(axis=1)

    es_cox = EvalSurv(df_cox.copy())

    # Calculate time points at 25th, 50th, and 75th percentiles
    time_points = [
        np.percentile(Calibration_df[time_col], 25),
        np.percentile(Calibration_df[time_col], 50),
        np.percentile(Calibration_df[time_col], 75)
    ]

    results = {}

    for t in time_points:
        # Compute RISK_PERC for the specific time point
        df_cox["RISK_PERC"] = es_cox.get_risk_perc(at_time=t)

        # Apply condition to filter data
        CONDITION = my_df2[cur_var] == cur_value
        CONDITION = CONDITION.astype(bool)

        subset = df_cox.loc[CONDITION].copy()
        subset["QUANTILE"] = pd.qcut(subset["RISK_PERC"], q=20, labels=range(20))

        # Aggregate calibration data for the quantiles
        calibration_data = subset.groupby("QUANTILE", observed=True).agg({"RISK_PERC": "mean", "EVENT": "sum"}).copy()
        calibration_data["EVENT_PERC"] = calibration_data["EVENT"] / (len(df_cox.index) / 20) * 100
        calibration_data.reset_index(inplace=True)

        # Compute calibration slope and calibration error
        calibration_slope = compute_calibration_slope(calibration_data)
        calibration_error = np.abs(1 - calibration_slope)

        # Store results for this time point
        results[t] = {"calibration_slope": calibration_slope, "calibration_error": calibration_error}

    return results


In [None]:
def analyze_now_aug(Calibration_df, file_suffix, cur_var, cur_value, CUR_AUG):

    # Define percentiles for different time points
    time_points = [
        np.percentile(Calibration_df[time_col], 25),
        np.percentile(Calibration_df[time_col], 50),
        np.percentile(Calibration_df[time_col], 75)
    ]

    # Initialize dictionaries to store results for each time point
    Cali_results = {t: [] for t in time_points}
    Dto1_results = {t: [] for t in time_points}

    for itr in range(5):
        PredictorCols = covariate_cols

        x     = Calibration_df[PredictorCols]
        time  = Calibration_df[time_col]
        event = Calibration_df[event_col]

        df_cox = pd.DataFrame(x)
        df_cox["TIME"] = time
        df_cox["EVENT"] = event

        lph_matrix_cox = np.zeros((df_cox.shape[0], num_folds))

        for fold in range(num_folds):
            for swap in range(2):
                idx = (Calibration_df[f"FOLD_{fold}"] == swap)
                temp = pd.read_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}_{itr}_{CUR_AUG}.csv")
                lph_matrix_cox[idx, fold] = temp["LPH"]

        df_cox["LPH"] = lph_matrix_cox.mean(axis=1)

        es_cox = EvalSurv(df_cox.copy())

        # Loop through each time point and calculate calibration metrics
        for t in time_points:
            df_cox["RISK_PERC"] = es_cox.get_risk_perc(at_time=t)

            # Apply condition to filter data
            CONDITION = my_df2[cur_var] == cur_value
            CONDITION = CONDITION.astype(bool)

            subset = df_cox.loc[CONDITION].copy()
            subset["QUANTILE"] = pd.qcut(subset["RISK_PERC"], q=20, labels=range(20))

            calibration_data = subset.groupby("QUANTILE", observed=True).agg({"RISK_PERC": "mean", "EVENT": "sum"}).copy()
            calibration_data["EVENT_PERC"] = calibration_data["EVENT"] / (len(df_cox.index) / 20) * 100
            calibration_data.reset_index(inplace=True)

            # Compute calibration slope and calibration error (D21 Score)
            calibration_slope = compute_calibration_slope(calibration_data)
            d21 = np.abs(1 - calibration_slope)

            # Append results for each time point
            Cali_results[t].append(calibration_slope)
            Dto1_results[t].append(d21)

    # Calculate mean and standard deviation for each time point
    final_results = {}
    for t in time_points:
        Cali_mean = np.mean(Cali_results[t])
        Cali_std  = np.std(Cali_results[t])
        Dto1_mean = np.mean(Dto1_results[t])
        Dto1_std  = np.std(Dto1_results[t])

        final_results[t] = {
            "Cali_mean": Cali_mean,
            "Cali_std": Cali_std,
            "Dto1_mean": Dto1_mean,
            "Dto1_std": Dto1_std
        }

    return final_results

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Sex_0")
seed_everything()

cur_var, cur_value = "DEM_Sex", 0
org = "original"
aug = "augmented_DEM_Sex_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Sex_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6817
D21 Score (Calibration Error): 0.3183

At time 93.00:
Calibration Slope: 0.5397
D21 Score (Calibration Error): 0.4603

At time 100.00:
Calibration Slope: 0.4914
D21 Score (Calibration Error): 0.5086

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4521 (0.0349)
D21 Score:         0.5479 (0.0349)

At time 93.00:
Calibration Slope: 0.3722 (0.0275)
D21 Score:         0.6278 (0.0275)

At time 100.00:
Calibration Slope: 0.3443 (0.0239)
D21 Score:         0.6557 (0.0239)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.4630 (0.0656)
D21 Score:         0.5370 (0.0656)

At time 93.00:
Calibration Slope: 0.3692 (0.0425)
D21 Score:         0.6308 (0.0425)

At time 100.00:
Calibration Slope: 0.3369 (0.0347)
D21 Score:         0.6631 (0.0347)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4904 (0.0638)
D21 Score:       

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Sex_1")
seed_everything()

cur_var, cur_value = "DEM_Sex", 1
org = "original"
aug = "augmented_DEM_Sex_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Sex_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.4709
D21 Score (Calibration Error): 0.5291

At time 93.00:
Calibration Slope: 0.4034
D21 Score (Calibration Error): 0.5966

At time 100.00:
Calibration Slope: 0.3799
D21 Score (Calibration Error): 0.6201

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4636 (0.0158)
D21 Score:         0.5364 (0.0158)

At time 93.00:
Calibration Slope: 0.3984 (0.0135)
D21 Score:         0.6016 (0.0135)

At time 100.00:
Calibration Slope: 0.3739 (0.0131)
D21 Score:         0.6261 (0.0131)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.4408 (0.0265)
D21 Score:         0.5592 (0.0265)

At time 93.00:
Calibration Slope: 0.3775 (0.0199)
D21 Score:         0.6225 (0.0199)

At time 100.00:
Calibration Slope: 0.3528 (0.0167)
D21 Score:         0.6472 (0.0167)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4436 (0.0206)
D21 Score:       

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Age_0")
seed_everything()

cur_var, cur_value = "Strat_Age", 0
org = "original"
aug = "augmented_Strat_Age_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Age_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.9115
D21 Score (Calibration Error): 0.0885

At time 93.00:
Calibration Slope: 0.7595
D21 Score (Calibration Error): 0.2405

At time 100.00:
Calibration Slope: 0.7068
D21 Score (Calibration Error): 0.2932

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.9037 (0.1083)
D21 Score:         0.1363 (0.0494)

At time 93.00:
Calibration Slope: 0.7551 (0.0843)
D21 Score:         0.2449 (0.0843)

At time 100.00:
Calibration Slope: 0.6956 (0.0758)
D21 Score:         0.3044 (0.0758)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.9069 (0.1275)
D21 Score:         0.1555 (0.0274)

At time 93.00:
Calibration Slope: 0.7252 (0.0942)
D21 Score:         0.2748 (0.0942)

At time 100.00:
Calibration Slope: 0.6590 (0.0823)
D21 Score:         0.3410 (0.0823)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.7956 (0.0534)
D21 Score:     

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Age_1")
seed_everything()

cur_var, cur_value = "Strat_Age", 1
org = "original"
aug = "augmented_Strat_Age_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Age_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1885
D21 Score (Calibration Error): 0.8115

At time 93.00:
Calibration Slope: 0.1613
D21 Score (Calibration Error): 0.8387

At time 100.00:
Calibration Slope: 0.1517
D21 Score (Calibration Error): 0.8483

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.1862 (0.0100)
D21 Score:         0.8138 (0.0100)

At time 93.00:
Calibration Slope: 0.1619 (0.0045)
D21 Score:         0.8381 (0.0045)

At time 100.00:
Calibration Slope: 0.1532 (0.0033)
D21 Score:         0.8468 (0.0033)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.2103 (0.0136)
D21 Score:         0.7897 (0.0136)

At time 93.00:
Calibration Slope: 0.1778 (0.0090)
D21 Score:         0.8222 (0.0090)

At time 100.00:
Calibration Slope: 0.1661 (0.0075)
D21 Score:         0.8339 (0.0075)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.1961 (0.0092)
D21 Score:     

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Smoking_History_0")
seed_everything()

cur_var, cur_value = "DEM_Smoking_History", 0
org = "original"
aug = "augmented_DEM_Smoking_History_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Smoking_History_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.9119
D21 Score (Calibration Error): 0.0881

At time 93.00:
Calibration Slope: 0.7499
D21 Score (Calibration Error): 0.2501

At time 100.00:
Calibration Slope: 0.6942
D21 Score (Calibration Error): 0.3058

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.8553 (0.0144)
D21 Score:         0.1447 (0.0144)

At time 93.00:
Calibration Slope: 0.7090 (0.0168)
D21 Score:         0.2910 (0.0168)

At time 100.00:
Calibration Slope: 0.6596 (0.0198)
D21 Score:         0.3404 (0.0198)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.8724 (0.0338)
D21 Score:         0.1276 (0.0338)

At time 93.00:
Calibration Slope: 0.7071 (0.0215)
D21 Score:         0.2929 (0.0215)

At time 100.00:
Calibration Slope: 0.6488 (0.0182)
D21 Score:         0.3512 (0.0182)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.8044 (0.0520)
D21 S

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Smoking_History_1")
seed_everything()

cur_var, cur_value = "DEM_Smoking_History", 1
org = "original"
aug = "augmented_DEM_Smoking_History_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Smoking_History_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1501
D21 Score (Calibration Error): 0.8499

At time 93.00:
Calibration Slope: 0.1314
D21 Score (Calibration Error): 0.8686

At time 100.00:
Calibration Slope: 0.1250
D21 Score (Calibration Error): 0.8750

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.1523 (0.0099)
D21 Score:         0.8477 (0.0099)

At time 93.00:
Calibration Slope: 0.1321 (0.0078)
D21 Score:         0.8679 (0.0078)

At time 100.00:
Calibration Slope: 0.1230 (0.0065)
D21 Score:         0.8770 (0.0065)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.1397 (0.0034)
D21 Score:         0.8603 (0.0034)

At time 93.00:
Calibration Slope: 0.1207 (0.0026)
D21 Score:         0.8793 (0.0026)

At time 100.00:
Calibration Slope: 0.1125 (0.0025)
D21 Score:         0.8875 (0.0025)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.1521 (0.0081)
D21 S

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_eGFR_0")
seed_everything()

cur_var, cur_value = "Strat_eGFR", 0
org = "original"
aug = "augmented_" + "Strat_eGFR_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_eGFR_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.7893
D21 Score (Calibration Error): 0.2107

At time 93.00:
Calibration Slope: 0.5760
D21 Score (Calibration Error): 0.4240

At time 100.00:
Calibration Slope: 0.5047
D21 Score (Calibration Error): 0.4953

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4682 (0.0582)
D21 Score:         0.5318 (0.0582)

At time 93.00:
Calibration Slope: 0.3685 (0.0403)
D21 Score:         0.6315 (0.0403)

At time 100.00:
Calibration Slope: 0.3332 (0.0337)
D21 Score:         0.6668 (0.0337)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.4804 (0.0629)
D21 Score:         0.5196 (0.0629)

At time 93.00:
Calibration Slope: 0.3673 (0.0457)
D21 Score:         0.6327 (0.0457)

At time 100.00:
Calibration Slope: 0.3256 (0.0424)
D21 Score:         0.6744 (0.0424)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4269 (0.0630)
D21 Score:    

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_eGFR_1")
seed_everything()

cur_var, cur_value = "Strat_eGFR", 1
org = "original"
aug = "augmented_" + "Strat_eGFR_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_eGFR_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.3041
D21 Score (Calibration Error): 0.6959

At time 93.00:
Calibration Slope: 0.2625
D21 Score (Calibration Error): 0.7375

At time 100.00:
Calibration Slope: 0.2480
D21 Score (Calibration Error): 0.7520

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.3292 (0.0108)
D21 Score:         0.6708 (0.0108)

At time 93.00:
Calibration Slope: 0.2789 (0.0069)
D21 Score:         0.7211 (0.0069)

At time 100.00:
Calibration Slope: 0.2609 (0.0057)
D21 Score:         0.7391 (0.0057)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.3321 (0.0138)
D21 Score:         0.6679 (0.0138)

At time 93.00:
Calibration Slope: 0.2802 (0.0115)
D21 Score:         0.7198 (0.0115)

At time 100.00:
Calibration Slope: 0.2607 (0.0097)
D21 Score:         0.7393 (0.0097)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.3151 (0.0112)
D21 Score:    

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_BP_Level_0")
seed_everything()

cur_var, cur_value = "Strat_BP_Level", 0
org = "original"
aug = "augmented_" + "Strat_BP_Level_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_BP_Level_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6511
D21 Score (Calibration Error): 0.3489

At time 93.00:
Calibration Slope: 0.5421
D21 Score (Calibration Error): 0.4579

At time 100.00:
Calibration Slope: 0.5044
D21 Score (Calibration Error): 0.4956

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.6521 (0.0587)
D21 Score:         0.3479 (0.0587)

At time 93.00:
Calibration Slope: 0.5308 (0.0286)
D21 Score:         0.4692 (0.0286)

At time 100.00:
Calibration Slope: 0.4893 (0.0193)
D21 Score:         0.5107 (0.0193)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.6649 (0.0490)
D21 Score:         0.3351 (0.0490)

At time 93.00:
Calibration Slope: 0.5396 (0.0425)
D21 Score:         0.4604 (0.0425)

At time 100.00:
Calibration Slope: 0.4923 (0.0401)
D21 Score:         0.5077 (0.0401)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.5658 (0.0378)
D21 Score:

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_BP_Level_1")
seed_everything()

cur_var, cur_value = "Strat_BP_Level", 1
org = "original"
aug = "augmented_" + "Strat_BP_Level_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_BP_Level_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.3358
D21 Score (Calibration Error): 0.6642

At time 93.00:
Calibration Slope: 0.2891
D21 Score (Calibration Error): 0.7109

At time 100.00:
Calibration Slope: 0.2728
D21 Score (Calibration Error): 0.7272

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.3443 (0.0152)
D21 Score:         0.6557 (0.0152)

At time 93.00:
Calibration Slope: 0.2911 (0.0117)
D21 Score:         0.7089 (0.0117)

At time 100.00:
Calibration Slope: 0.2712 (0.0106)
D21 Score:         0.7288 (0.0106)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.3322 (0.0168)
D21 Score:         0.6678 (0.0168)

At time 93.00:
Calibration Slope: 0.2786 (0.0128)
D21 Score:         0.7214 (0.0128)

At time 100.00:
Calibration Slope: 0.2588 (0.0115)
D21 Score:         0.7412 (0.0115)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.3296 (0.0126)
D21 Score:

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Obesity_0"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Obesity_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5387
D21 Score (Calibration Error): 0.4613

At time 93.00:
Calibration Slope: 0.4413
D21 Score (Calibration Error): 0.5587

At time 100.00:
Calibration Slope: 0.4081
D21 Score (Calibration Error): 0.5919

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4354 (0.0269)
D21 Score:         0.5646 (0.0269)

At time 93.00:
Calibration Slope: 0.3743 (0.0205)
D21 Score:         0.6257 (0.0205)

At time 100.00:
Calibration Slope: 0.3504 (0.0194)
D21 Score:         0.6496 (0.0194)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.3955 (0.0342)
D21 Score:         0.6045 (0.0342)

At time 93.00:
Calibration Slope: 0.3385 (0.0232)
D21 Score:         0.6615 (0.0232)

At time 100.00:
Calibration Slope: 0.3171 (0.0195)
D21 Score:         0.6829 (0.0195)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4199 (0.0372)
D21 Score: 

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Obesity_1"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Obesity_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5133
D21 Score (Calibration Error): 0.4867

At time 93.00:
Calibration Slope: 0.4384
D21 Score (Calibration Error): 0.5616

At time 100.00:
Calibration Slope: 0.4121
D21 Score (Calibration Error): 0.5879

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.5056 (0.0147)
D21 Score:         0.4944 (0.0147)

At time 93.00:
Calibration Slope: 0.4259 (0.0121)
D21 Score:         0.5741 (0.0121)

At time 100.00:
Calibration Slope: 0.3993 (0.0127)
D21 Score:         0.6007 (0.0127)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.5200 (0.0308)
D21 Score:         0.4800 (0.0308)

At time 93.00:
Calibration Slope: 0.4290 (0.0164)
D21 Score:         0.5710 (0.0164)

At time 100.00:
Calibration Slope: 0.3982 (0.0131)
D21 Score:         0.6018 (0.0131)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4916 (0.0239)
D21 Score: 

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_CVD_0"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_CVD_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 1.0966
D21 Score (Calibration Error): 0.0966

At time 93.00:
Calibration Slope: 0.8741
D21 Score (Calibration Error): 0.1259

At time 100.00:
Calibration Slope: 0.7979
D21 Score (Calibration Error): 0.2021

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.8469 (0.0848)
D21 Score:         0.1531 (0.0848)

At time 93.00:
Calibration Slope: 0.6895 (0.0499)
D21 Score:         0.3105 (0.0499)

At time 100.00:
Calibration Slope: 0.6342 (0.0376)
D21 Score:         0.3658 (0.0376)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.8922 (0.1017)
D21 Score:         0.1402 (0.0479)

At time 93.00:
Calibration Slope: 0.7187 (0.0620)
D21 Score:         0.2813 (0.0620)

At time 100.00:
Calibration Slope: 0.6562 (0.0466)
D21 Score:         0.3438 (0.0466)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.8147 (0.0746)
D21 Score:     

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_CVD_1"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_CVD_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1314
D21 Score (Calibration Error): 0.8686

At time 93.00:
Calibration Slope: 0.1140
D21 Score (Calibration Error): 0.8860

At time 100.00:
Calibration Slope: 0.1079
D21 Score (Calibration Error): 0.8921

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.1381 (0.0098)
D21 Score:         0.8619 (0.0098)

At time 93.00:
Calibration Slope: 0.1183 (0.0061)
D21 Score:         0.8817 (0.0061)

At time 100.00:
Calibration Slope: 0.1109 (0.0048)
D21 Score:         0.8891 (0.0048)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.1454 (0.0140)
D21 Score:         0.8546 (0.0140)

At time 93.00:
Calibration Slope: 0.1240 (0.0090)
D21 Score:         0.8760 (0.0090)

At time 100.00:
Calibration Slope: 0.1162 (0.0072)
D21 Score:         0.8838 (0.0072)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.1377 (0.0111)
D21 Score:     

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Dia_Fin_0"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Dia_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 1.0268
D21 Score (Calibration Error): 0.0268

At time 93.00:
Calibration Slope: 0.7417
D21 Score (Calibration Error): 0.2583

At time 100.00:
Calibration Slope: 0.6464
D21 Score (Calibration Error): 0.3536

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4058 (0.0538)
D21 Score:         0.5942 (0.0538)

At time 93.00:
Calibration Slope: 0.3311 (0.0406)
D21 Score:         0.6689 (0.0406)

At time 100.00:
Calibration Slope: 0.3027 (0.0332)
D21 Score:         0.6973 (0.0332)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.4250 (0.0499)
D21 Score:         0.5750 (0.0499)

At time 93.00:
Calibration Slope: 0.3395 (0.0408)
D21 Score:         0.6605 (0.0408)

At time 100.00:
Calibration Slope: 0.3049 (0.0401)
D21 Score:         0.6951 (0.0401)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.3749 (0.0515)
D21 Score: 

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Dia_Fin_1"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Dia_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.4102
D21 Score (Calibration Error): 0.5898

At time 93.00:
Calibration Slope: 0.3542
D21 Score (Calibration Error): 0.6458

At time 100.00:
Calibration Slope: 0.3345
D21 Score (Calibration Error): 0.6655

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.4461 (0.0110)
D21 Score:         0.5539 (0.0110)

At time 93.00:
Calibration Slope: 0.3743 (0.0042)
D21 Score:         0.6257 (0.0042)

At time 100.00:
Calibration Slope: 0.3485 (0.0038)
D21 Score:         0.6515 (0.0038)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.4584 (0.0162)
D21 Score:         0.5416 (0.0162)

At time 93.00:
Calibration Slope: 0.3823 (0.0144)
D21 Score:         0.6177 (0.0144)

At time 100.00:
Calibration Slope: 0.3536 (0.0133)
D21 Score:         0.6464 (0.0133)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.4184 (0.0032)
D21 Score: 

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Hyper_Fin_0"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Hyper_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6410
D21 Score (Calibration Error): 0.3590

At time 93.00:
Calibration Slope: 0.4776
D21 Score (Calibration Error): 0.5224

At time 100.00:
Calibration Slope: 0.4222
D21 Score (Calibration Error): 0.5778

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.3213 (0.1683)
D21 Score:         0.6787 (0.1683)

At time 93.00:
Calibration Slope: 0.2561 (0.1257)
D21 Score:         0.7439 (0.1257)

At time 100.00:
Calibration Slope: 0.2323 (0.1106)
D21 Score:         0.7677 (0.1106)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.1649 (0.0469)
D21 Score:         0.8351 (0.0469)

At time 93.00:
Calibration Slope: 0.1399 (0.0375)
D21 Score:         0.8601 (0.0375)

At time 100.00:
Calibration Slope: 0.1280 (0.0313)
D21 Score:         0.8720 (0.0313)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.2252 (0.0362)
D21 Score

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Hyper_Fin_1"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Hyper_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6497
D21 Score (Calibration Error): 0.3503

At time 93.00:
Calibration Slope: 0.5566
D21 Score (Calibration Error): 0.4434

At time 100.00:
Calibration Slope: 0.5241
D21 Score (Calibration Error): 0.4759

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.7163 (0.0273)
D21 Score:         0.2837 (0.0273)

At time 93.00:
Calibration Slope: 0.5944 (0.0190)
D21 Score:         0.4056 (0.0190)

At time 100.00:
Calibration Slope: 0.5516 (0.0182)
D21 Score:         0.4484 (0.0182)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.7170 (0.0296)
D21 Score:         0.2830 (0.0296)

At time 93.00:
Calibration Slope: 0.5899 (0.0246)
D21 Score:         0.4101 (0.0246)

At time 100.00:
Calibration Slope: 0.5442 (0.0220)
D21 Score:         0.4558 (0.0220)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.6750 (0.0195)
D21 Score

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Lipid_Fin_0"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Lipid_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5065
D21 Score (Calibration Error): 0.4935

At time 93.00:
Calibration Slope: 0.3860
D21 Score (Calibration Error): 0.6140

At time 100.00:
Calibration Slope: 0.3454
D21 Score (Calibration Error): 0.6546

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.2450 (0.0299)
D21 Score:         0.7550 (0.0299)

At time 93.00:
Calibration Slope: 0.2045 (0.0240)
D21 Score:         0.7955 (0.0240)

At time 100.00:
Calibration Slope: 0.1894 (0.0210)
D21 Score:         0.8106 (0.0210)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.3012 (0.0433)
D21 Score:         0.6988 (0.0433)

At time 93.00:
Calibration Slope: 0.2483 (0.0333)
D21 Score:         0.7517 (0.0333)

At time 100.00:
Calibration Slope: 0.2293 (0.0312)
D21 Score:         0.7707 (0.0312)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.3125 (0.0429)
D21 Score

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

curding = "Strat_Lipid_Fin_1"
print(f"Calibration Condition: {curding}")
seed_everything()

cur_var, cur_value = curding[:-2], int(curding[-1])
org = "original"
aug = "augmented_" + curding

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data

AUG = ["RandomOverSampler", "SMOTE", "SMOTENC", "ADASYN", "BorderlineSMOTE", "SVMSMOTE"]

for CUR_AUG in AUG:
    results_augmented = analyze_now_aug(Calibration_df,
                                        file_suffix = aug,
                                        cur_var     = cur_var,
                                        cur_value   = cur_value,
                                        CUR_AUG     = CUR_AUG)

    print("")
    print("#---")
    print(f"Augmented Data with {CUR_AUG}")
    for time_point, metrics in results_augmented.items():
        Cali_mean = metrics["Cali_mean"]
        Cali_std = metrics["Cali_std"]
        Dto1_mean = metrics["Dto1_mean"]
        Dto1_std = metrics["Dto1_std"]

        print(f"\nAt time {time_point:.2f}:")
        print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
        print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Lipid_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6145
D21 Score (Calibration Error): 0.3855

At time 93.00:
Calibration Slope: 0.5277
D21 Score (Calibration Error): 0.4723

At time 100.00:
Calibration Slope: 0.4973
D21 Score (Calibration Error): 0.5027

#---
Augmented Data with RandomOverSampler

At time 77.00:
Calibration Slope: 0.6477 (0.0296)
D21 Score:         0.3523 (0.0296)

At time 93.00:
Calibration Slope: 0.5467 (0.0155)
D21 Score:         0.4533 (0.0155)

At time 100.00:
Calibration Slope: 0.5107 (0.0152)
D21 Score:         0.4893 (0.0152)

#---
Augmented Data with SMOTE

At time 77.00:
Calibration Slope: 0.6475 (0.0304)
D21 Score:         0.3525 (0.0304)

At time 93.00:
Calibration Slope: 0.5384 (0.0266)
D21 Score:         0.4616 (0.0266)

At time 100.00:
Calibration Slope: 0.4990 (0.0250)
D21 Score:         0.5010 (0.0250)

#---
Augmented Data with SMOTENC

At time 77.00:
Calibration Slope: 0.6100 (0.0247)
D21 Score