In [None]:
import pandas as pd
import numpy as np

# Reloading the dataset to work with the variables
file_path = "CKD_EHR.csv"
data = pd.read_csv(file_path)

In [None]:
!pip install scikit-survival
!pip install lifelines
!pip install torchtuples

!pip install scikit-learn==1.2.2
!pip install imbalanced-learn==0.9.1

Collecting scikit-survival
  Downloading scikit_survival-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/49.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.0/49.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Downloading scikit_survival-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-survival
Successfully installed scikit-survival-0.23.1
Collecting lifelines
  Downloading lifelines-0.30.0-py3-none-any.whl.metadata (3.2 kB)
Collecting autograd-gamma>=0.3 (from lifelines)
  Downloading autograd-gamma-0.5.0.tar.gz (4.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting formulaic>=0.2.2 (from lifelines)
  Downloading formulaic

In [None]:
# Basic libraries
import pandas as pd
import numpy as np
import random
import os

# Survival analysis
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
from lifelines.plotting import add_at_risk_counts
from lifelines.statistics import proportional_hazard_test

# Machine learning and data processing
from sklearn.model_selection import RepeatedStratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import roc_auc_score

# XGBoost library
import xgboost as xgb

# PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Plotting
import matplotlib.pyplot as plt

def seed_everything(seed=42):
    """
    Seed everything to make all operations in PyTorch deterministic.
    Args:
        seed (int): Seed value to set. Default is 42.
    """
    random.seed(seed)         # Python random module.
    np.random.seed(seed)      # Numpy module.
    os.environ['PYTHONHASHSEED'] = str(seed)  # Set PYTHONHASHSEED env variable at a fixed value

    torch.manual_seed(seed)   # Sets the seed for generating random numbers for all devices (both CPU and CUDA).
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # For multi-GPU setups.

    torch.backends.cudnn.deterministic = True  # Forces to use the deterministic algorithm
    torch.backends.cudnn.benchmark = False     # If True, causes cuDNN to benchmark multiple convolution algorithms and select the fastest.

seed_everything()

In [None]:
# Importing pandas to reload the data
file_path = "CKD_EHR.csv"
data = pd.read_csv(file_path)

# Assuming `data` is already loaded with original column names
# Dictionary for renaming columns with the updated meta-type prefixes
rename_dict = {
    'AgeBaseline': 'DEM_Age',
    'Sex': 'DEM_Sex',
    'HistorySmoking': 'DEM_Smoking_History',
    'HistoryObesity': 'DEM_Obesity_History',

    'CholesterolBaseline': 'CBM_Cholesterol',
    'CreatinineBaseline': 'CBM_Creatinine',
    'eGFRBaseline': 'CBM_eGFR',
    'sBPBaseline': 'CBM_Systolic_BP',
    'dBPBaseline': 'CBM_Diastolic_BP',
    'BMIBaseline': 'CBM_BMI',

    'HistoryDiabetes': 'MH_Diabetes_History',
    'HistoryCHD': 'MH_CHD_History',
    'HistoryVascular': 'MH_Vascular_History',
    'HistoryHTN ': 'MH_Hypertension_History',
    'HistoryDLD': 'MH_Dyslipidemia_History',

    'DLDmeds': 'MED_DLD_Meds',
    'DMmeds': 'MED_Diabetes_Meds',
    'HTNmeds': 'MED_HTN_Meds',
    'ACEIARB': 'MED_ACEI_ARB_Use',

    'EventCKD35': 'OUT_EventCKD35',
    'TimeToEventMonths': 'OUT_TimeToEventMonths'
}

# Renaming columns in the dataset
data.rename(columns=rename_dict, inplace=True)

# Reorganizing columns by meta-class
# Defining the desired column order based on meta-types
ordered_columns = [
    # Demographic and Lifestyle Variables
    'DEM_Age', 'DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',

    # Clinical Baseline Measurements
    'CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR', 'CBM_Systolic_BP', 'CBM_Diastolic_BP', 'CBM_BMI',

    # Medical History Variables
    'MH_Diabetes_History', 'MH_CHD_History', 'MH_Vascular_History', 'MH_Hypertension_History', 'MH_Dyslipidemia_History',

    # Medication Use Variables
    'MED_DLD_Meds', 'MED_Diabetes_Meds', 'MED_HTN_Meds', 'MED_ACEI_ARB_Use',

    # Outcome Variables
    'OUT_EventCKD35', 'OUT_TimeToEventMonths'
]

# Reordering the columns in the DataFrame
data = data[ordered_columns]

import copy
my_df = copy.copy(data)

my_df.head()

Unnamed: 0,DEM_Age,DEM_Sex,DEM_Smoking_History,DEM_Obesity_History,CBM_Cholesterol,CBM_Creatinine,CBM_eGFR,CBM_Systolic_BP,CBM_Diastolic_BP,CBM_BMI,...,MH_CHD_History,MH_Vascular_History,MH_Hypertension_History,MH_Dyslipidemia_History,MED_DLD_Meds,MED_Diabetes_Meds,MED_HTN_Meds,MED_ACEI_ARB_Use,OUT_EventCKD35,OUT_TimeToEventMonths
0,64,0,0,1,4.8,59.0,93.3,144,87,40,...,0,0,1,1,1,0,1,0,0,98
1,52,0,0,1,6.4,52.0,105.8,148,91,45,...,0,0,1,1,0,0,1,0,0,106
2,56,0,0,1,6.4,57.0,99.8,149,86,41,...,0,0,1,1,1,0,1,0,0,88
3,58,0,0,1,5.1,65.0,90.3,116,68,32,...,0,0,0,1,1,0,0,0,0,103
4,63,0,0,1,5.0,70.0,79.7,132,63,31,...,0,0,1,1,1,1,1,1,0,105


In [None]:
seed_everything()

from scipy import stats

# Define the columns to transform with the new naming convention
columns_to_transform = [
    "DEM_Age", "CBM_Cholesterol", "CBM_Creatinine",
    "CBM_eGFR", "CBM_Systolic_BP", "CBM_Diastolic_BP", "CBM_BMI", "OUT_TimeToEventMonths"
]

# Initialize a dictionary to store lambda, min, and max values for each variable
transformation_params = {}

# Calculate lambda, min, and max values after Box-Cox transformation for each column
for column in columns_to_transform:
    transformed_col, fitted_lambda = stats.boxcox(my_df[column] + 1)

    # Store results
    transformation_params[column] = {
        'lambda': fitted_lambda,
        'min': transformed_col.min(),
        'max': transformed_col.max() - transformed_col.min()
    }

transformation_params

{'DEM_Age': {'lambda': 1.2662992056388673,
  'min': 43.39012967842432,
  'max': 191.3898534575847},
 'CBM_Cholesterol': {'lambda': 0.6248263691363789,
  'min': 1.7292454725150124,
  'max': 3.5423875279830037},
 'CBM_Creatinine': {'lambda': 0.720326985559311,
  'min': 4.25093822196494,
  'max': 39.07222171574685},
 'CBM_eGFR': {'lambda': 0.1850831973195447,
  'min': 6.159945456661608,
  'max': 3.3776387109546517},
 'CBM_Systolic_BP': {'lambda': -0.1480752913441735,
  'min': 3.3016158023805056,
  'max': 0.32410690297348443},
 'CBM_Diastolic_BP': {'lambda': 0.747046329801293,
  'min': 20.503519942479617,
  'max': 23.90851560861832},
 'CBM_BMI': {'lambda': -0.253506208010147,
  'min': 1.9241673900633185,
  'max': 0.611313003691752},
 'OUT_TimeToEventMonths': {'lambda': 2.244567602827024,
  'min': 0.0,
  'max': 17720.0431725757}}

In [None]:
seed_everything()

import copy
MY_DF = copy.copy(my_df)

# Applying transformations using the values from transformation_params
for column, params in transformation_params.items():
    MY_DF[column] = MY_DF[column].astype(float)
    MY_DF[column] = stats.boxcox(MY_DF[column] + 1, lmbda=params['lambda'])
    MY_DF[column] = (MY_DF[column] - params['min']) / params['max']

# Convert the DataFrame to a PyTorch tensor
MY_DF_TENSOR = torch.tensor(MY_DF.values, dtype=torch.float32)

In [None]:
# Define Attention Layer
class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.attention_weights = nn.Linear(input_dim, input_dim, bias=False)  # Shape: (input_dim, input_dim)

    def forward(self, x, mask):
        # x: (batch_size, num_features)
        attention_scores = self.attention_weights(x)  # Shape: (batch_size, num_features)

        # Apply mask to the attention scores
        attention_scores = attention_scores.masked_fill(mask == 0, float('-inf'))  # Mask invalid positions

        # Compute softmax to get attention weights along feature dimension
        attention_weights = torch.softmax(attention_scores, dim=1)  # Shape: (batch_size, num_features)

        # Apply attention weights to the features (element-wise multiplication)
        weighted_sum = attention_weights * x  # Shape: (batch_size, num_features)

        return weighted_sum  # Return the attention-weighted features


# Define Attention + MLP Model
class AttentionMLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(AttentionMLPModel, self).__init__()
        self.attention1 = AttentionLayer(input_dim)  # Pass input_dim to AttentionLayer
        self.mlp1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),  # Linear layer after attention
            nn.ReLU(),
            nn.LayerNorm(hidden_dim),  # Layer normalization
            nn.Linear(hidden_dim, hidden_dim),  # Output size should match input_dim
            nn.ReLU(),  # Sigmoid activation for output in range [0, 1]
            nn.LayerNorm(hidden_dim),  # Layer normalization
        )

        self.attention2 = AttentionLayer(hidden_dim)  # Pass input_dim to AttentionLayer
        self.mlp2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),  # Linear layer after attention
            nn.ReLU(),
            nn.LayerNorm(hidden_dim),  # Layer normalization
            nn.Linear(hidden_dim, input_dim),  # Output size should match input_dim
            nn.Sigmoid()  # Sigmoid activation for output in range [0, 1]
        )

        self.I2H = nn.Linear(input_dim, hidden_dim)

    def forward(self, x, mask):
        # Apply the attention mechanism
        attended_features = self.attention1(x, mask)  # Shape: (batch_size, num_features)
        # Pass the attended features through the MLP
        predicted_values = self.mlp1(attended_features)  # Shape: (batch_size, num_features)

        # Apply the attention mechanism
        attended_features = self.attention2(predicted_values + torch.relu(self.I2H(x)),
                                            torch.ones_like(predicted_values)
                                            )  # Shape: (batch_size, num_features)
        # Pass the attended features through the MLP
        predicted_values = self.mlp2(attended_features)  # Shape: (batch_size, num_features)

        return predicted_values


In [None]:
seed_everything()

data = copy.copy(MY_DF_TENSOR)

from torch.utils.data import DataLoader, TensorDataset

# Check if GPU is available and move tensors to the GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert dataset to tensor and move to the appropriate device
data = data.to(device)

# Create DataLoader for batches
batch_size = 16
dataset = TensorDataset(data)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize model, loss function, and optimizer
input_dim = data.shape[1]
hidden_dim = 64
model = AttentionMLPModel(input_dim=input_dim, hidden_dim=hidden_dim).to(device)  # Move model to GPU

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for reconstruction
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
seed_everything()

import random

# Training parameters
epochs = 500
min_mask_ratio = 0.10  # Masking between 10% to 95% of data
max_mask_ratio = 0.95

# Training loop
for epoch in range(epochs):
    running_loss = 0.0
    model.train()  # Set model to training mode

    for batch in data_loader:
        inputs = batch[0].to(device)  # Move inputs to GPU
        mask = torch.ones_like(inputs).to(device)  # Initialize mask on GPU

        # Randomly mask 10-95% of the batch data, row-wise
        for i in range(inputs.size(0)):  # Iterate over batch rows
            mask_ratio = random.uniform(min_mask_ratio, max_mask_ratio)
            num_to_mask = int(inputs.size(1) * mask_ratio)
            mask_indices = random.sample(range(inputs.size(1)), num_to_mask)
            mask[i, mask_indices] = 0  # Apply mask (0 for missing data)

        # Forward pass through the model
        outputs = model(inputs, mask)

        # Calculate loss only on masked values
        loss = criterion(outputs * (1 - mask), inputs * (1 - mask))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print out loss for the epoch
    if (epoch+1) % 50 == 0:
      print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(data_loader):.4f}")


Epoch [50/500], Loss: 0.0039
Epoch [100/500], Loss: 0.0009
Epoch [150/500], Loss: 0.0004
Epoch [200/500], Loss: 0.0001
Epoch [250/500], Loss: 0.0001
Epoch [300/500], Loss: 0.0001
Epoch [350/500], Loss: 0.0001
Epoch [400/500], Loss: 0.0001
Epoch [450/500], Loss: 0.0001
Epoch [500/500], Loss: 0.0001


In [None]:
# Binary conversion function: set 1 for any condition present, 0 for none
def convert_to_binary(series, positive_conditions):
    return series.apply(lambda x: 1 if x in positive_conditions else 0)

# Creating stratified columns
my_df2 = copy.copy(my_df)

#---
my_df2['Strat_Age']         = my_df2['DEM_Age'].apply(lambda x: 'Older' if x >= 65 else 'Younger')
#---
my_df2['Strat_eGFR']        = my_df2['CBM_eGFR'].apply(lambda x: 'Normal' if x >= 90 else 'Non-Ideal')
#---
my_df2['Strat_Diabetes']    = my_df2.apply(
                                lambda  row:  'Diabetes With Medication' if \
                                                  row['MH_Diabetes_History'] == 1 and \
                                                  row['MED_Diabetes_Meds'] == 1 else \
                                              'Diabetes Without Medication' if \
                                                  row['MH_Diabetes_History'] == 1 else \
                                              'No Diabetes',
                                axis=1)
#---
my_df2['Strat_Hypertension'] = my_df2.apply(
                                lambda row:   'Hypertension With Medication' if \
                                                  row['MH_Hypertension_History'] == 1 and \
                                                  (row['MED_HTN_Meds'] == 1 or row['MED_ACEI_ARB_Use'] == 1) else \
                                              'Hypertension Without Medication' if \
                                                  row['MH_Hypertension_History'] == 1 else
                                              'No Hypertension',
                                axis=1)
#---
my_df2['Strat_BP_Level']     = my_df2.apply(
                                lambda row:   'Elevated BP' if \
                                                  row['CBM_Systolic_BP'] >= 140 or \
                                                  row['CBM_Diastolic_BP'] >= 90 else \
                                              'Normal BP',
                                axis=1)
#---
my_df2['Strat_Obesity']      = my_df2.apply(
                                lambda row:   'Obese' if \
                                                  row['CBM_BMI'] >= 30 or row['DEM_Obesity_History'] == 1 else \
                                              'Not Obese',
                                axis=1)
#---
my_df2['Strat_CVD']          = my_df2.apply(
                                lambda row:   'CVD' if \
                                                  row['MH_CHD_History'] == 1 or \
                                                  row['MH_Vascular_History'] == 1 else \
                                              'No CVD',
                                axis=1)
#---
my_df2['Strat_Lipid']        = my_df2.apply(
                                lambda row:   'Dyslipidemia With Medication' if \
                                                  row['MH_Dyslipidemia_History'] == 1 and \
                                                  row['MED_DLD_Meds'] == 1 else \
                                              'Dyslipidemia Without Medication' if \
                                                  row['MH_Dyslipidemia_History'] == 1 else \
                                              'No Dyslipidemia',
                                axis=1)
#---
my_df2['Strat_Dia_Fin']      = my_df2['Strat_Diabetes'].apply(
                                lambda x:     1 if  x != 'No Diabetes' else 0)
#---
my_df2['Strat_Hyper_Fin']    = my_df2.apply(
                                lambda row:   1 if  row['Strat_Hypertension'] != 'No Hypertension' or \
                                                    row['Strat_BP_Level'] == 'Elevated BP' else \
                                              0,
                                axis=1)
#---
my_df2['Strat_Lipid_Fin']    = my_df2['Strat_Lipid'].apply(
                                lambda x:     1 if  x != 'No Dyslipidemia' else 0)

###===###
my_df2['Strat_Age']           = convert_to_binary(my_df2['Strat_Age'], ['Older'])
my_df2['Strat_eGFR']          = convert_to_binary(my_df2['Strat_eGFR'], ['Non-Ideal'])
my_df2['Strat_Diabetes']      = convert_to_binary(my_df2['Strat_Diabetes'], ['Diabetes With Medication', 'Diabetes Without Medication'])
my_df2['Strat_Hypertension']  = convert_to_binary(my_df2['Strat_Hypertension'], ['Hypertension With Medication', 'Hypertension Without Medication'])
my_df2['Strat_BP_Level']      = convert_to_binary(my_df2['Strat_BP_Level'], ['Elevated BP'])
my_df2['Strat_Obesity']       = convert_to_binary(my_df2['Strat_Obesity'], ['Obese'])
my_df2['Strat_CVD']           = convert_to_binary(my_df2['Strat_CVD'], ['CVD'])
my_df2['Strat_Lipid']         = convert_to_binary(my_df2['Strat_Lipid'], ['Dyslipidemia With Medication', 'Dyslipidemia Without Medication'])

In [None]:
# DEM_ variables
dem_sex_ratio             = my_df2['DEM_Sex'].value_counts(normalize=True)
strat_age_ratio           = my_df2['Strat_Age'].value_counts(normalize=True)
dem_smoking_history_ratio = my_df2['DEM_Smoking_History'].value_counts(normalize=True)

# non-DEM_ stratified variables, including the new flags
strat_egfr_ratio          = my_df2['Strat_eGFR'].value_counts(normalize=True)
strat_diabetes_ratio      = my_df2['Strat_Diabetes'].value_counts(normalize=True)
strat_hypertension_ratio  = my_df2['Strat_Hypertension'].value_counts(normalize=True)
strat_bp_level_ratio      = my_df2['Strat_BP_Level'].value_counts(normalize=True)
strat_obesity_ratio       = my_df2['Strat_Obesity'].value_counts(normalize=True)
strat_cvd_ratio           = my_df2['Strat_CVD'].value_counts(normalize=True)
strat_lipid_ratio         = my_df2['Strat_Lipid'].value_counts(normalize=True)
strat_dia_fin_ratio       = my_df2['Strat_Dia_Fin'].value_counts(normalize=True)
strat_hyper_fin_ratio     = my_df2['Strat_Hyper_Fin'].value_counts(normalize=True)
strat_lipid_fin_ratio     = my_df2['Strat_Lipid_Fin'].value_counts(normalize=True)

# Storing results in a structured dictionary
ratios = {
    "DEM_Sex": dem_sex_ratio,
    "Strat_Age": strat_age_ratio,
    "DEM_Smoking_History": dem_smoking_history_ratio,
    "Strat_eGFR": strat_egfr_ratio,
    "Strat_BP_Level": strat_bp_level_ratio,
    "Strat_Obesity": strat_obesity_ratio,
    "Strat_CVD": strat_cvd_ratio,
    "Strat_Dia_Fin": strat_dia_fin_ratio,
    "Strat_Hyper_Fin": strat_hyper_fin_ratio,
    "Strat_Lipid_Fin": strat_lipid_fin_ratio
}

np.round(pd.DataFrame(ratios), 4).T

Unnamed: 0,0,1
DEM_Sex,0.4908,0.5092
Strat_Age,0.7719,0.2281
DEM_Smoking_History,0.8473,0.1527
Strat_eGFR,0.6721,0.3279
Strat_BP_Level,0.6904,0.3096
Strat_Obesity,0.4949,0.5051
Strat_CVD,0.8615,0.1385
Strat_Dia_Fin,0.5621,0.4379
Strat_Hyper_Fin,0.3177,0.6823
Strat_Lipid_Fin,0.3544,0.6456


In [None]:
seed_everything()

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import lifelines
import math

class EvalSurv:
    #---
    def __init__(self, df):
        self.df = df
        self.df["PARTIAL_HAZARD"] = np.exp(self.df["LPH"])

        self.base_surv = None

    #---
    def get_risk_perc(self, at_time):
        return 100 * self.get_risk(at_time)

    def get_risk(self, at_time):
        return 1 - self.get_surv(at_time)

    def get_surv(self, at_time):
        # Handle time zero explicitly
        if at_time == 0:
            return np.ones(len(self.df))

        return np.power(
                  self.get_base_surv(at_time),
                  self.df["PARTIAL_HAZARD"]
              )

    def get_base_surv(self, at_time):
        if self.base_surv is None:
            self.compute_baseline_survival()

        return self.base_surv.loc[self.base_surv.index <= at_time].min()

    #---
    def compute_baseline_survival(self):
        df = self.df[["TIME", "EVENT", "PARTIAL_HAZARD"]]
        df = df.groupby(["TIME"]).sum().\
                sort_index(ascending = False)

        df["CUM_PARTIAL_HAZARD"] = df["PARTIAL_HAZARD"].cumsum()

        df = df[df["EVENT"] > 0]
        df["ALPHA"] = np.exp(
                        -df["EVENT"] / df["CUM_PARTIAL_HAZARD"])
        df.sort_index(inplace = True)
        df["S0"] = df["ALPHA"].cumprod()
        self.base_surv = df["S0"]


In [None]:
seed_everything()

from sklearn.model_selection import train_test_split
import copy

data_copy = copy.copy(my_df2)
covariate_cols = [
       'DEM_Age', 'DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
       'CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR', 'CBM_Systolic_BP',
       'CBM_Diastolic_BP', 'CBM_BMI', 'MH_Diabetes_History', 'MH_CHD_History',
       'MH_Vascular_History', 'MH_Hypertension_History',
       'MH_Dyslipidemia_History', 'MED_DLD_Meds', 'MED_Diabetes_Meds',
       'MED_HTN_Meds', 'MED_ACEI_ARB_Use'
]
time_col = 'OUT_TimeToEventMonths'
event_col = 'OUT_EventCKD35'

Strat_col = ['Strat_Age', 'Strat_eGFR', 'Strat_BP_Level', 'Strat_Obesity',
             'Strat_CVD', 'Strat_Dia_Fin', 'Strat_Hyper_Fin', 'Strat_Lipid_Fin']

my_Selected = covariate_cols + [time_col, event_col] + Strat_col
data_copy = data_copy[my_Selected]

Calibration_df = copy.copy(data_copy)

num_folds = 5
for i in range(num_folds):
  df_trn, df_tst = \
    train_test_split(
        Calibration_df,
        test_size=0.5,
        train_size = 0.5,
        shuffle = True,
        stratify = Calibration_df[event_col])

  Calibration_df.loc[df_trn.index, "FOLD_"+str(i)] = 0
  Calibration_df.loc[df_tst.index, "FOLD_"+str(i)] = 1

Calibration_df.to_csv("CKD_FoldSwap.csv")

In [None]:
Calibration_df.head()

Unnamed: 0,DEM_Age,DEM_Sex,DEM_Smoking_History,DEM_Obesity_History,CBM_Cholesterol,CBM_Creatinine,CBM_eGFR,CBM_Systolic_BP,CBM_Diastolic_BP,CBM_BMI,...,Strat_Obesity,Strat_CVD,Strat_Dia_Fin,Strat_Hyper_Fin,Strat_Lipid_Fin,FOLD_0,FOLD_1,FOLD_2,FOLD_3,FOLD_4
0,64,0,0,1,4.8,59.0,93.3,144,87,40,...,1,0,0,1,1,0.0,1.0,1.0,1.0,0.0
1,52,0,0,1,6.4,52.0,105.8,148,91,45,...,1,0,0,1,1,1.0,0.0,1.0,1.0,0.0
2,56,0,0,1,6.4,57.0,99.8,149,86,41,...,1,0,0,1,1,0.0,0.0,1.0,0.0,1.0
3,58,0,0,1,5.1,65.0,90.3,116,68,32,...,1,0,0,0,1,0.0,0.0,0.0,1.0,1.0
4,63,0,0,1,5.0,70.0,79.7,132,63,31,...,1,0,1,1,1,1.0,1.0,1.0,0.0,0.0


In [None]:
def process_data_base(Calibration_df):
    file_suffix = "original"
    PredictorCols = covariate_cols

    for fold in range(num_folds):
        for swap in range(2):
            idx = (Calibration_df[f"FOLD_{fold}"] == (1 - swap))
            x = Calibration_df.loc[idx, PredictorCols]
            time  = Calibration_df.loc[idx, time_col]
            event = Calibration_df.loc[idx, event_col]

            tim = pd.DataFrame(x)
            tim["TIME"] = time
            tim["EVENT"] = event

            cph = CoxPHFitter(penalizer=0.01)

            cph.fit(tim, duration_col='TIME', event_col='EVENT')

            # Generate predictions for the swap set
            idx = (Calibration_df[f"FOLD_{fold}"] == swap)
            x = Calibration_df.loc[idx, PredictorCols]

            df_cox = pd.DataFrame(
                {"LPH": np.dot(x - cph._norm_mean.values, cph.params_)}
            )

            df_cox.to_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}.csv")


In [None]:
Strat_col

['Strat_Age',
 'Strat_eGFR',
 'Strat_BP_Level',
 'Strat_Obesity',
 'Strat_CVD',
 'Strat_Dia_Fin',
 'Strat_Hyper_Fin',
 'Strat_Lipid_Fin']

In [None]:
list(ratios.keys())

['DEM_Sex',
 'Strat_Age',
 'DEM_Smoking_History',
 'Strat_eGFR',
 'Strat_BP_Level',
 'Strat_Obesity',
 'Strat_CVD',
 'Strat_Dia_Fin',
 'Strat_Hyper_Fin',
 'Strat_Lipid_Fin']

In [None]:
seed_everything()

def inverse_boxcox_pytorch(data, lmbda):
    """Apply the inverse Box-Cox transformation using PyTorch."""
    if lmbda != 0:
        return torch.exp(torch.log(lmbda * data + 1) / lmbda) - 1
    else:
        return torch.exp(data) - 1

mask_ratio = 0.5
model.eval()

###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
###===######===######===###
All_Synth_Data_50PC = {}

for Cur_Strat in list(ratios.keys()):
    print("###===######===######===###")
    print(Cur_Strat)

    for now01 in [0, 1]:
        for now_cur_itr in range(5):
            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===######===######===###
            cur_Loc = list(np.where(my_df2[Cur_Strat].values == now01)[0])
            synth_df_MLM = copy.copy(MY_DF_TENSOR)
            synth_df_MLM = synth_df_MLM[cur_Loc, :]

            synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)

            ###===######===######===###
            mask = torch.ones_like(synth_data_tensor).to(device)
            for i in range(synth_data_tensor.size(0)):
                num_to_mask = int(synth_data_tensor.size(1) * mask_ratio)
                mask_indices = random.sample(range(synth_data_tensor.size(1)), num_to_mask)
                mask[i, mask_indices] = 0

            with torch.no_grad():  # No need to compute gradients during inference
                synthetic_output = model(synth_data_tensor, mask)

            ###===######===######===###
            synth_data_tensor = mask * synth_data_tensor + (1 - mask) * synthetic_output

            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===######===######===###
            synth_data_mod = synth_data_tensor.clone()

            for idx, (column, params) in enumerate(transformation_params.items()):
                C_loc = np.where(np.array(my_df.columns == column))[0][0]

                synth_data_mod[:, C_loc] = synth_data_mod[:, C_loc] * params['max'] + params['min']
                synth_data_mod[:, C_loc] = inverse_boxcox_pytorch(synth_data_mod[:, C_loc], params['lambda'])

            synth_data_df = pd.DataFrame(synth_data_mod.detach().cpu().numpy(), columns=MY_DF.columns)

            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===######===######===###
            synth_df_MLM = copy.copy(synth_data_df)

            ###===######===######===###
            Selected_Bin_Col = ['DEM_Sex', 'DEM_Smoking_History', 'DEM_Obesity_History',
                                'MH_Diabetes_History', 'MH_CHD_History',
                                'MH_Vascular_History', 'MH_Hypertension_History',
                                'MH_Dyslipidemia_History',
                                'MED_DLD_Meds', 'MED_Diabetes_Meds',
                                'MED_HTN_Meds', 'MED_ACEI_ARB_Use', 'OUT_EventCKD35']

            synth_df_MLM[Selected_Bin_Col] = \
              (synth_df_MLM[Selected_Bin_Col] > 0.5).astype(int)

            ###===######===######===###
            Selected_Num_Col = ['DEM_Age', 'OUT_TimeToEventMonths']

            synth_df_MLM[Selected_Num_Col] = \
              np.round(synth_df_MLM[Selected_Num_Col])

            ###===######===######===###
            Selected_Num_Col_r = ['CBM_Cholesterol', 'CBM_Creatinine', 'CBM_eGFR',
                                  'CBM_Systolic_BP',
                                  'CBM_Diastolic_BP', 'CBM_BMI']

            synth_df_MLM[Selected_Num_Col_r] = \
              np.round(synth_df_MLM[Selected_Num_Col_r], 2)

            ###===######===######===###
            synth_data_df = copy.copy(synth_df_MLM)

            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===>>>>>>>>>>>>>>>>>>>>>###===>>>>>>>>>>>>>>>>>>>>>
            ###===######===######===###
            All_Synth_Data_50PC[f"{Cur_Strat}_{now01}_{now_cur_itr}"] = synth_data_df


###===######===######===###
DEM_Sex


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_Age


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.

###===######===######===###
DEM_Smoking_History


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_eGFR


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_BP_Level


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_Obesity


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_CVD


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.

###===######===######===###
Strat_Dia_Fin


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_Hyper_Fin


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


###===######===######===###
Strat_Lipid_Fin


  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)
  synth_data_tensor = torch.tensor(synth_df_MLM, dtype=torch.float32).to(device)


In [None]:
All_Synth_Data_50PC.keys()

dict_keys(['DEM_Sex_0_0', 'DEM_Sex_0_1', 'DEM_Sex_0_2', 'DEM_Sex_0_3', 'DEM_Sex_0_4', 'DEM_Sex_1_0', 'DEM_Sex_1_1', 'DEM_Sex_1_2', 'DEM_Sex_1_3', 'DEM_Sex_1_4', 'Strat_Age_0_0', 'Strat_Age_0_1', 'Strat_Age_0_2', 'Strat_Age_0_3', 'Strat_Age_0_4', 'Strat_Age_1_0', 'Strat_Age_1_1', 'Strat_Age_1_2', 'Strat_Age_1_3', 'Strat_Age_1_4', 'DEM_Smoking_History_0_0', 'DEM_Smoking_History_0_1', 'DEM_Smoking_History_0_2', 'DEM_Smoking_History_0_3', 'DEM_Smoking_History_0_4', 'DEM_Smoking_History_1_0', 'DEM_Smoking_History_1_1', 'DEM_Smoking_History_1_2', 'DEM_Smoking_History_1_3', 'DEM_Smoking_History_1_4', 'Strat_eGFR_0_0', 'Strat_eGFR_0_1', 'Strat_eGFR_0_2', 'Strat_eGFR_0_3', 'Strat_eGFR_0_4', 'Strat_eGFR_1_0', 'Strat_eGFR_1_1', 'Strat_eGFR_1_2', 'Strat_eGFR_1_3', 'Strat_eGFR_1_4', 'Strat_BP_Level_0_0', 'Strat_BP_Level_0_1', 'Strat_BP_Level_0_2', 'Strat_BP_Level_0_3', 'Strat_BP_Level_0_4', 'Strat_BP_Level_1_0', 'Strat_BP_Level_1_1', 'Strat_BP_Level_1_2', 'Strat_BP_Level_1_3', 'Strat_BP_Level_1_4',

In [None]:
All_Synth_Data_50PC['Strat_Age_0_4']

Unnamed: 0,DEM_Age,DEM_Sex,DEM_Smoking_History,DEM_Obesity_History,CBM_Cholesterol,CBM_Creatinine,CBM_eGFR,CBM_Systolic_BP,CBM_Diastolic_BP,CBM_BMI,...,MH_CHD_History,MH_Vascular_History,MH_Hypertension_History,MH_Dyslipidemia_History,MED_DLD_Meds,MED_Diabetes_Meds,MED_HTN_Meds,MED_ACEI_ARB_Use,OUT_EventCKD35,OUT_TimeToEventMonths
0,64.0,0,0,1,4.80,59.000000,93.750000,146.160004,87.000000,41.040001,...,0,0,1,1,1,0,1,0,0,97.0
1,51.0,0,0,1,6.40,50.930000,108.349998,151.300003,91.989998,45.000000,...,0,0,1,1,0,0,1,0,0,106.0
2,56.0,0,0,1,6.65,57.000000,99.800003,149.000000,87.129997,41.000000,...,0,0,1,1,1,0,1,0,0,85.0
3,58.0,0,0,1,5.30,64.589996,90.050003,116.000000,69.070000,32.000000,...,0,0,0,1,1,0,0,0,0,102.0
4,63.0,0,0,1,5.26,70.070000,82.139999,132.000000,62.759998,31.000000,...,0,0,1,1,1,1,1,1,0,103.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,25.0,1,0,0,5.30,73.000000,122.599998,127.000000,74.779999,23.910000,...,0,0,0,0,0,0,0,0,0,102.0
375,45.0,1,0,1,5.98,67.000000,110.300003,150.000000,112.000000,33.139999,...,0,0,1,0,0,0,0,0,0,90.0
376,43.0,1,0,1,6.41,66.300003,111.120003,126.000000,83.000000,34.259998,...,0,0,0,1,1,1,0,0,0,0.0
377,40.0,1,1,0,4.00,93.000000,86.510002,118.849998,64.949997,22.000000,...,0,1,1,1,1,0,1,1,1,35.0


In [None]:
def process_data_stratified(Calibration_df):

    PredictorCols = covariate_cols

    ###===######===######===######===######===######===###
    for Cur_Strat in list(ratios.keys()):
        print("###===######===######===###")
        print(Cur_Strat)

        ###===######===######===######===######===######===###
        for now01 in [0, 1]:
            for now_cur_itr in range(5):
                ###===######===######===######===######===######===###
                Cur_keys = f"{Cur_Strat}_{now01}_{now_cur_itr}"

                file_suffix = "augmented_" + Cur_keys

                ###===######===######===######===######===######===###
                for fold in range(num_folds):
                    for swap in range(2):
                        idx = (Calibration_df[f"FOLD_{fold}"] == (1 - swap))
                        x = Calibration_df.loc[idx, PredictorCols]
                        time  = Calibration_df.loc[idx, time_col]
                        event = Calibration_df.loc[idx, event_col]

                        tim = pd.DataFrame(x)
                        tim["TIME"] = time
                        tim["EVENT"] = event

                        cph = CoxPHFitter(penalizer=0.01)

                        ###===######===######===######===######===######===###
                        synth_data_df = All_Synth_Data_50PC[Cur_keys]
                        tim_synth           = synth_data_df[PredictorCols]
                        tim_synth["TIME"]   = synth_data_df[time_col]
                        tim_synth["EVENT"]  = synth_data_df[event_col]

                        cph.fit(pd.concat([tim, tim_synth]), duration_col='TIME', event_col='EVENT')

                        ###===######===######===######===######===######===###
                        # Generate predictions for the swap set
                        idx = (Calibration_df[f"FOLD_{fold}"] == swap)
                        x = Calibration_df.loc[idx, PredictorCols]
                        df_cox = pd.DataFrame(
                            {"LPH": np.dot(x - cph._norm_mean.values, cph.params_)}
                        )

                        df_cox.to_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}.csv")


In [None]:
seed_everything()

process_data_base(Calibration_df)

In [None]:
seed_everything()

process_data_stratified(Calibration_df)

###===######===######===###
DEM_Sex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_Age


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
DEM_Smoking_History


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_eGFR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_BP_Level


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_Obesity


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_CVD


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_Dia_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_Hyper_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


###===######===######===###
Strat_Lipid_Fin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tim_synth["TIME"]   = synth_data_df[time_col]
A value is trying to be set on a copy of a slice from a DataFrame.


In [None]:
###===######===######===######===######===###
###===######===######===######===######===###
# check out:
# 2024-08-07_WHAS_Calibration_All

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

###===######===######===######===######===###
###===######===######===######===######===###
def compute_calibration_slope(calibration_data):
    # Fit a linear regression model
    reg = LinearRegression(fit_intercept = False)
    reg.fit(calibration_data["RISK_PERC"].values.reshape(-1, 1),
            calibration_data["EVENT_PERC"].values)

    # Get the calibration slope
    calibration_slope = reg.coef_[0]
    return calibration_slope

###===######===######===######===######===###
###===######===######===######===######===###
def analyze_now(Calibration_df, file_suffix, cur_var, cur_value):
    PredictorCols = covariate_cols

    x     = Calibration_df[PredictorCols]
    time  = Calibration_df[time_col]
    event = Calibration_df[event_col]

    df_cox = pd.DataFrame(x)
    df_cox["TIME"] = time
    df_cox["EVENT"] = event

    lph_matrix_cox = np.zeros((df_cox.shape[0], num_folds))

    for fold in range(num_folds):
        for swap in range(2):
            idx = (Calibration_df[f"FOLD_{fold}"] == swap)
            temp = pd.read_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}.csv")
            lph_matrix_cox[idx, fold] = temp["LPH"]

    df_cox["LPH"] = lph_matrix_cox.mean(axis=1)

    es_cox = EvalSurv(df_cox.copy())

    # Calculate time points at 25th, 50th, and 75th percentiles
    time_points = [
        np.percentile(Calibration_df[time_col], 25),
        np.percentile(Calibration_df[time_col], 50),
        np.percentile(Calibration_df[time_col], 75)
    ]

    results = {}

    for t in time_points:
        # Compute RISK_PERC for the specific time point
        df_cox["RISK_PERC"] = es_cox.get_risk_perc(at_time=t)

        # Apply condition to filter data
        CONDITION = my_df2[cur_var] == cur_value
        CONDITION = CONDITION.astype(bool)

        subset = df_cox.loc[CONDITION].copy()
        subset["QUANTILE"] = pd.qcut(subset["RISK_PERC"], q=20, labels=range(20))

        # Aggregate calibration data for the quantiles
        calibration_data = subset.groupby("QUANTILE", observed=True).agg({"RISK_PERC": "mean", "EVENT": "sum"}).copy()
        calibration_data["EVENT_PERC"] = calibration_data["EVENT"] / (len(df_cox.index) / 20) * 100
        calibration_data.reset_index(inplace=True)

        # Compute calibration slope and calibration error
        calibration_slope = compute_calibration_slope(calibration_data)
        calibration_error = np.abs(1 - calibration_slope)

        # Store results for this time point
        results[t] = {"calibration_slope": calibration_slope, "calibration_error": calibration_error}

    return results


In [None]:
def analyze_now_aug(Calibration_df, file_suffix, cur_var, cur_value):

    # Define percentiles for different time points
    time_points = [
        np.percentile(Calibration_df[time_col], 25),
        np.percentile(Calibration_df[time_col], 50),
        np.percentile(Calibration_df[time_col], 75)
    ]

    # Initialize dictionaries to store results for each time point
    Cali_results = {t: [] for t in time_points}
    Dto1_results = {t: [] for t in time_points}

    for itr in range(5):
        PredictorCols = covariate_cols

        x     = Calibration_df[PredictorCols]
        time  = Calibration_df[time_col]
        event = Calibration_df[event_col]

        df_cox = pd.DataFrame(x)
        df_cox["TIME"] = time
        df_cox["EVENT"] = event

        lph_matrix_cox = np.zeros((df_cox.shape[0], num_folds))

        for fold in range(num_folds):
            for swap in range(2):
                idx = (Calibration_df[f"FOLD_{fold}"] == swap)
                temp = pd.read_csv(f"CKD_FoldSwap_{fold}_{swap}_{file_suffix}_{itr}.csv")
                lph_matrix_cox[idx, fold] = temp["LPH"]

        df_cox["LPH"] = lph_matrix_cox.mean(axis=1)

        es_cox = EvalSurv(df_cox.copy())

        # Loop through each time point and calculate calibration metrics
        for t in time_points:
            df_cox["RISK_PERC"] = es_cox.get_risk_perc(at_time=t)

            # Apply condition to filter data
            CONDITION = my_df2[cur_var] == cur_value
            CONDITION = CONDITION.astype(bool)

            subset = df_cox.loc[CONDITION].copy()
            subset["QUANTILE"] = pd.qcut(subset["RISK_PERC"], q=20, labels=range(20))

            calibration_data = subset.groupby("QUANTILE", observed=True).agg({"RISK_PERC": "mean", "EVENT": "sum"}).copy()
            calibration_data["EVENT_PERC"] = calibration_data["EVENT"] / (len(df_cox.index) / 20) * 100
            calibration_data.reset_index(inplace=True)

            # Compute calibration slope and calibration error (D21 Score)
            calibration_slope = compute_calibration_slope(calibration_data)
            d21 = np.abs(1 - calibration_slope)

            # Append results for each time point
            Cali_results[t].append(calibration_slope)
            Dto1_results[t].append(d21)

    # Calculate mean and standard deviation for each time point
    final_results = {}
    for t in time_points:
        Cali_mean = np.mean(Cali_results[t])
        Cali_std  = np.std(Cali_results[t])
        Dto1_mean = np.mean(Dto1_results[t])
        Dto1_std  = np.std(Dto1_results[t])

        final_results[t] = {
            "Cali_mean": Cali_mean,
            "Cali_std": Cali_std,
            "Dto1_mean": Dto1_mean,
            "Dto1_std": Dto1_std
        }

    return final_results

In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Sex_0")
seed_everything()

cur_var, cur_value = "DEM_Sex", 0
org = "original"
aug = "augmented_DEM_Sex_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Sex_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6817
D21 Score (Calibration Error): 0.3183

At time 93.00:
Calibration Slope: 0.5397
D21 Score (Calibration Error): 0.4603

At time 100.00:
Calibration Slope: 0.4914
D21 Score (Calibration Error): 0.5086

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.7639 (0.0056)
D21 Score:         0.2361 (0.0056)

At time 93.00:
Calibration Slope: 0.5833 (0.0033)
D21 Score:         0.4167 (0.0033)

At time 100.00:
Calibration Slope: 0.5274 (0.0027)
D21 Score:         0.4726 (0.0027)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Sex_1")
seed_everything()

cur_var, cur_value = "DEM_Sex", 1
org = "original"
aug = "augmented_DEM_Sex_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Sex_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.4709
D21 Score (Calibration Error): 0.5291

At time 93.00:
Calibration Slope: 0.4034
D21 Score (Calibration Error): 0.5966

At time 100.00:
Calibration Slope: 0.3799
D21 Score (Calibration Error): 0.6201

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.5120 (0.0070)
D21 Score:         0.4880 (0.0070)

At time 93.00:
Calibration Slope: 0.4313 (0.0043)
D21 Score:         0.5687 (0.0043)

At time 100.00:
Calibration Slope: 0.4009 (0.0034)
D21 Score:         0.5991 (0.0034)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Age_0")
seed_everything()

cur_var, cur_value = "Strat_Age", 0
org = "original"
aug = "augmented_Strat_Age_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Age_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.9115
D21 Score (Calibration Error): 0.0885

At time 93.00:
Calibration Slope: 0.7595
D21 Score (Calibration Error): 0.2405

At time 100.00:
Calibration Slope: 0.7068
D21 Score (Calibration Error): 0.2932

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 1.0101 (0.0094)
D21 Score:         0.0123 (0.0062)

At time 93.00:
Calibration Slope: 0.8336 (0.0090)
D21 Score:         0.1664 (0.0090)

At time 100.00:
Calibration Slope: 0.7702 (0.0090)
D21 Score:         0.2298 (0.0090)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Age_1")
seed_everything()

cur_var, cur_value = "Strat_Age", 1
org = "original"
aug = "augmented_Strat_Age_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Age_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1885
D21 Score (Calibration Error): 0.8115

At time 93.00:
Calibration Slope: 0.1613
D21 Score (Calibration Error): 0.8387

At time 100.00:
Calibration Slope: 0.1517
D21 Score (Calibration Error): 0.8483

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.2341 (0.0029)
D21 Score:         0.7659 (0.0029)

At time 93.00:
Calibration Slope: 0.1924 (0.0020)
D21 Score:         0.8076 (0.0020)

At time 100.00:
Calibration Slope: 0.1779 (0.0018)
D21 Score:         0.8221 (0.0018)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Smoking_History_0")
seed_everything()

cur_var, cur_value = "DEM_Smoking_History", 0
org = "original"
aug = "augmented_DEM_Smoking_History_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Smoking_History_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.9119
D21 Score (Calibration Error): 0.0881

At time 93.00:
Calibration Slope: 0.7499
D21 Score (Calibration Error): 0.2501

At time 100.00:
Calibration Slope: 0.6942
D21 Score (Calibration Error): 0.3058

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.9810 (0.0087)
D21 Score:         0.0190 (0.0087)

At time 93.00:
Calibration Slope: 0.7754 (0.0049)
D21 Score:         0.2246 (0.0049)

At time 100.00:
Calibration Slope: 0.7083 (0.0040)
D21 Score:         0.2917 (0.0040)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: DEM_Smoking_History_1")
seed_everything()

cur_var, cur_value = "DEM_Smoking_History", 1
org = "original"
aug = "augmented_DEM_Smoking_History_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: DEM_Smoking_History_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1501
D21 Score (Calibration Error): 0.8499

At time 93.00:
Calibration Slope: 0.1314
D21 Score (Calibration Error): 0.8686

At time 100.00:
Calibration Slope: 0.1250
D21 Score (Calibration Error): 0.8750

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.1687 (0.0011)
D21 Score:         0.8313 (0.0011)

At time 93.00:
Calibration Slope: 0.1477 (0.0008)
D21 Score:         0.8523 (0.0008)

At time 100.00:
Calibration Slope: 0.1389 (0.0007)
D21 Score:         0.8611 (0.0007)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_eGFR_0")
seed_everything()

cur_var, cur_value = "Strat_eGFR", 0
org = "original"
aug = "augmented_Strat_eGFR_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_eGFR_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.7893
D21 Score (Calibration Error): 0.2107

At time 93.00:
Calibration Slope: 0.5760
D21 Score (Calibration Error): 0.4240

At time 100.00:
Calibration Slope: 0.5047
D21 Score (Calibration Error): 0.4953

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 1.0645 (0.0103)
D21 Score:         0.0645 (0.0103)

At time 93.00:
Calibration Slope: 0.7828 (0.0073)
D21 Score:         0.2172 (0.0073)

At time 100.00:
Calibration Slope: 0.6886 (0.0069)
D21 Score:         0.3114 (0.0069)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_eGFR_1")
seed_everything()

cur_var, cur_value = "Strat_eGFR", 1
org = "original"
aug = "augmented_Strat_eGFR_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_eGFR_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.3041
D21 Score (Calibration Error): 0.6959

At time 93.00:
Calibration Slope: 0.2625
D21 Score (Calibration Error): 0.7375

At time 100.00:
Calibration Slope: 0.2480
D21 Score (Calibration Error): 0.7520

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.3439 (0.0029)
D21 Score:         0.6561 (0.0029)

At time 93.00:
Calibration Slope: 0.2889 (0.0018)
D21 Score:         0.7111 (0.0018)

At time 100.00:
Calibration Slope: 0.2692 (0.0015)
D21 Score:         0.7308 (0.0015)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_BP_Level_0")
seed_everything()

cur_var, cur_value = "Strat_BP_Level", 0
org = "original"
aug = "augmented_Strat_BP_Level_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_BP_Level_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6511
D21 Score (Calibration Error): 0.3489

At time 93.00:
Calibration Slope: 0.5421
D21 Score (Calibration Error): 0.4579

At time 100.00:
Calibration Slope: 0.5044
D21 Score (Calibration Error): 0.4956

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.7321 (0.0153)
D21 Score:         0.2679 (0.0153)

At time 93.00:
Calibration Slope: 0.5820 (0.0111)
D21 Score:         0.4180 (0.0111)

At time 100.00:
Calibration Slope: 0.5311 (0.0096)
D21 Score:         0.4689 (0.0096)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_BP_Level_1")
seed_everything()

cur_var, cur_value = "Strat_BP_Level", 1
org = "original"
aug = "augmented_Strat_BP_Level_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_BP_Level_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.3358
D21 Score (Calibration Error): 0.6642

At time 93.00:
Calibration Slope: 0.2891
D21 Score (Calibration Error): 0.7109

At time 100.00:
Calibration Slope: 0.2728
D21 Score (Calibration Error): 0.7272

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.3814 (0.0019)
D21 Score:         0.6186 (0.0019)

At time 93.00:
Calibration Slope: 0.3202 (0.0016)
D21 Score:         0.6798 (0.0016)

At time 100.00:
Calibration Slope: 0.2983 (0.0015)
D21 Score:         0.7017 (0.0015)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Obesity_0")
seed_everything()

cur_var, cur_value = "Strat_Obesity", 0
org = "original"
aug = "augmented_Strat_Obesity_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Obesity_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5387
D21 Score (Calibration Error): 0.4613

At time 93.00:
Calibration Slope: 0.4413
D21 Score (Calibration Error): 0.5587

At time 100.00:
Calibration Slope: 0.4081
D21 Score (Calibration Error): 0.5919

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.6326 (0.0069)
D21 Score:         0.3674 (0.0069)

At time 93.00:
Calibration Slope: 0.4988 (0.0039)
D21 Score:         0.5012 (0.0039)

At time 100.00:
Calibration Slope: 0.4546 (0.0031)
D21 Score:         0.5454 (0.0031)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Obesity_1")
seed_everything()

cur_var, cur_value = "Strat_Obesity", 1
org = "original"
aug = "augmented_Strat_Obesity_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Obesity_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5133
D21 Score (Calibration Error): 0.4867

At time 93.00:
Calibration Slope: 0.4384
D21 Score (Calibration Error): 0.5616

At time 100.00:
Calibration Slope: 0.4121
D21 Score (Calibration Error): 0.5879

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.5673 (0.0027)
D21 Score:         0.4327 (0.0027)

At time 93.00:
Calibration Slope: 0.4737 (0.0024)
D21 Score:         0.5263 (0.0024)

At time 100.00:
Calibration Slope: 0.4406 (0.0024)
D21 Score:         0.5594 (0.0024)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_CVD_0")
seed_everything()

cur_var, cur_value = "Strat_CVD", 0
org = "original"
aug = "augmented_Strat_CVD_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_CVD_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 1.0966
D21 Score (Calibration Error): 0.0966

At time 93.00:
Calibration Slope: 0.8741
D21 Score (Calibration Error): 0.1259

At time 100.00:
Calibration Slope: 0.7979
D21 Score (Calibration Error): 0.2021

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 1.0052 (0.0626)
D21 Score:         0.0607 (0.0160)

At time 93.00:
Calibration Slope: 0.8092 (0.0496)
D21 Score:         0.1908 (0.0496)

At time 100.00:
Calibration Slope: 0.7424 (0.0470)
D21 Score:         0.2576 (0.0470)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_CVD_1")
seed_everything()

cur_var, cur_value = "Strat_CVD", 1
org = "original"
aug = "augmented_Strat_CVD_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_CVD_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.1314
D21 Score (Calibration Error): 0.8686

At time 93.00:
Calibration Slope: 0.1140
D21 Score (Calibration Error): 0.8860

At time 100.00:
Calibration Slope: 0.1079
D21 Score (Calibration Error): 0.8921

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.1672 (0.0015)
D21 Score:         0.8328 (0.0015)

At time 93.00:
Calibration Slope: 0.1381 (0.0010)
D21 Score:         0.8619 (0.0010)

At time 100.00:
Calibration Slope: 0.1272 (0.0009)
D21 Score:         0.8728 (0.0009)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Dia_Fin_0")
seed_everything()

cur_var, cur_value = "Strat_Dia_Fin", 0
org = "original"
aug = "augmented_Strat_Dia_Fin_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Dia_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 1.0268
D21 Score (Calibration Error): 0.0268

At time 93.00:
Calibration Slope: 0.7417
D21 Score (Calibration Error): 0.2583

At time 100.00:
Calibration Slope: 0.6464
D21 Score (Calibration Error): 0.3536

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 1.0263 (0.0940)
D21 Score:         0.0907 (0.0361)

At time 93.00:
Calibration Slope: 0.7476 (0.0693)
D21 Score:         0.2524 (0.0693)

At time 100.00:
Calibration Slope: 0.6534 (0.0608)
D21 Score:         0.3466 (0.0608)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Dia_Fin_1")
seed_everything()

cur_var, cur_value = "Strat_Dia_Fin", 1
org = "original"
aug = "augmented_Strat_Dia_Fin_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Dia_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.4102
D21 Score (Calibration Error): 0.5898

At time 93.00:
Calibration Slope: 0.3542
D21 Score (Calibration Error): 0.6458

At time 100.00:
Calibration Slope: 0.3345
D21 Score (Calibration Error): 0.6655

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.4724 (0.0024)
D21 Score:         0.5276 (0.0024)

At time 93.00:
Calibration Slope: 0.3931 (0.0014)
D21 Score:         0.6069 (0.0014)

At time 100.00:
Calibration Slope: 0.3646 (0.0011)
D21 Score:         0.6354 (0.0011)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Hyper_Fin_0")
seed_everything()

cur_var, cur_value = "Strat_Hyper_Fin", 0
org = "original"
aug = "augmented_Strat_Hyper_Fin_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Hyper_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6410
D21 Score (Calibration Error): 0.3590

At time 93.00:
Calibration Slope: 0.4776
D21 Score (Calibration Error): 0.5224

At time 100.00:
Calibration Slope: 0.4222
D21 Score (Calibration Error): 0.5778

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.6319 (0.0114)
D21 Score:         0.3681 (0.0114)

At time 93.00:
Calibration Slope: 0.4703 (0.0078)
D21 Score:         0.5297 (0.0078)

At time 100.00:
Calibration Slope: 0.4169 (0.0068)
D21 Score:         0.5831 (0.0068)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Hyper_Fin_1")
seed_everything()

cur_var, cur_value = "Strat_Hyper_Fin", 1
org = "original"
aug = "augmented_Strat_Hyper_Fin_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Hyper_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6497
D21 Score (Calibration Error): 0.3503

At time 93.00:
Calibration Slope: 0.5566
D21 Score (Calibration Error): 0.4434

At time 100.00:
Calibration Slope: 0.5241
D21 Score (Calibration Error): 0.4759

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.7621 (0.0054)
D21 Score:         0.2379 (0.0054)

At time 93.00:
Calibration Slope: 0.6272 (0.0033)
D21 Score:         0.3728 (0.0033)

At time 100.00:
Calibration Slope: 0.5788 (0.0026)
D21 Score:         0.4212 (0.0026)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Lipid_Fin_0")
seed_everything()

cur_var, cur_value = "Strat_Lipid_Fin", 0
org = "original"
aug = "augmented_Strat_Lipid_Fin_0"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Lipid_Fin_0

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.5065
D21 Score (Calibration Error): 0.4935

At time 93.00:
Calibration Slope: 0.3860
D21 Score (Calibration Error): 0.6140

At time 100.00:
Calibration Slope: 0.3454
D21 Score (Calibration Error): 0.6546

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.5326 (0.0033)
D21 Score:         0.4674 (0.0033)

At time 93.00:
Calibration Slope: 0.4047 (0.0023)
D21 Score:         0.5953 (0.0023)

At time 100.00:
Calibration Slope: 0.3644 (0.0020)
D21 Score:         0.6356 (0.0020)


In [None]:
#dict_keys(['DEM_Sex_0', 'DEM_Sex_1',
#           'Strat_Age_0', 'Strat_Age_1',
#           'DEM_Smoking_History_0', 'DEM_Smoking_History_1',
#           'Strat_eGFR_0', 'Strat_eGFR_1',
#           'Strat_BP_Level_0', 'Strat_BP_Level_1',
#           'Strat_Obesity_0', 'Strat_Obesity_1',
#           'Strat_CVD_0', 'Strat_CVD_1',
#           'Strat_Dia_Fin_0', 'Strat_Dia_Fin_1',
#           'Strat_Hyper_Fin_0', 'Strat_Hyper_Fin_1',
#           'Strat_Lipid_Fin_0', 'Strat_Lipid_Fin_1'])

print("Calibration Condition: Strat_Lipid_Fin_1")
seed_everything()

cur_var, cur_value = "Strat_Lipid_Fin", 1
org = "original"
aug = "augmented_Strat_Lipid_Fin_1"

###===######===######===######===######===######===######===######===###
# Run the analyze_now function for the original data
results_original = analyze_now(Calibration_df, org, cur_var, cur_value)

print("")
print("#---")
print("Original Data, Baseline")
for time_point, metrics in results_original.items():
    calibration_slope = metrics["calibration_slope"]
    calibration_error = metrics["calibration_error"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {calibration_slope:.4f}")
    print(f"D21 Score (Calibration Error): {calibration_error:.4f}")

###===######===######===######===######===######===######===######===###
# Run the analyze_now_aug function for the augmented data
results_augmented = analyze_now_aug(Calibration_df, aug, cur_var, cur_value)

print("")
print("#---")
print("Augmented Data with Synthetic")
for time_point, metrics in results_augmented.items():
    Cali_mean = metrics["Cali_mean"]
    Cali_std = metrics["Cali_std"]
    Dto1_mean = metrics["Dto1_mean"]
    Dto1_std = metrics["Dto1_std"]

    print(f"\nAt time {time_point:.2f}:")
    print(f"Calibration Slope: {Cali_mean:.4f} ({Cali_std:.4f})")
    print(f"D21 Score:         {Dto1_mean:.4f} ({Dto1_std:.4f})")

Calibration Condition: Strat_Lipid_Fin_1

#---
Original Data, Baseline

At time 77.00:
Calibration Slope: 0.6145
D21 Score (Calibration Error): 0.3855

At time 93.00:
Calibration Slope: 0.5277
D21 Score (Calibration Error): 0.4723

At time 100.00:
Calibration Slope: 0.4973
D21 Score (Calibration Error): 0.5027

#---
Augmented Data with Synthetic

At time 77.00:
Calibration Slope: 0.6846 (0.0091)
D21 Score:         0.3154 (0.0091)

At time 93.00:
Calibration Slope: 0.5714 (0.0059)
D21 Score:         0.4286 (0.0059)

At time 100.00:
Calibration Slope: 0.5300 (0.0048)
D21 Score:         0.4700 (0.0048)
