# Subgroups

In [None]:
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score
from constants import targets

X = pd.read_pickle('data/external_x.pckl')
y = pd.read_pickle('data/external_y.pckl')


cols_to_convert = [
    "BMI"
]

X[cols_to_convert] = X[cols_to_convert].apply(pd.to_numeric, errors="coerce")

# Define the relevant columns
frailty_columns = ["C30_Q3", "C30_Q6", "C30_Q10", "C30_Q12", "C30_Q13"]

# Compute frailty score
X["frailty_score"] = X[frailty_columns].map(lambda x: 1 if x in [3, 4] else (0 if x in [1, 2] else np.nan)).sum(axis=1, skipna=False)

# Assign "Frail" while keeping NAs
X["Frail"] = np.where(X["frailty_score"].isna(), np.nan, np.where(X["frailty_score"] >= 3, 1, 0))

# Define populations based on dichotomous variables in X
populations = {
    'Menopause': X[X['Menopause'] == 1], # not in the data set, if only common denominator of all cohorts is taken 
    'Financial difficulties': X[X['C30_FI_class'] == 1],
    'Obese': X[X['BMI'] >= 30],
    'Comorbidities > 1': X[X['Comorbidities'] > 1],
    'Lower educational status': X[(X['Education_status_1'] == 1) | (X['Education_status_2'] == 1)],
    'Frail': X[X['Frail'] == 1],
    'Future_assessment': X[X['time_diff'] >= 365 ], # for predicting assessments that are more than 12 month in the future
    'After_diagnosis': X[X['Assessment_date_days'] <= 365 ], # for patients within the first 12 month after diagnosis where we expect the biggest changes
    'Full external dataset': X
}

X.to_csv('data/external_validation_set.csv')

In [None]:
for name, df in populations.items():
    n_rows = len(df)
    n_unique_balance_ids = df["BALANCE_ID"].nunique()
    print(f"{name}: {n_rows} observations, {n_unique_balance_ids} patients")

In [None]:
for target in targets:
    print(f"\n Evaluating performance for target: {target} \n")
    for group_name, X_group in populations.items():
        y_group = y.loc[X_group.index, target]  # Align indices and select the target column
        n_rows = len(y_group)
        n_events = (y_group == 1).sum()  # Assuming '1' indicates an event
        event_rate = (n_events / n_rows) * 100 if n_rows > 0 else 0
        print(f"{group_name}: {n_events} events ({event_rate:.2f}% event rate)")

In [None]:
import pandas as pd

# Build a dictionary of population summary strings per group.
pop_summary = {}
for name, df in populations.items():
    n_rows = len(df)
    n_unique_balance_ids = df["BALANCE_ID"].nunique()
    pop_summary[name] = f"{n_rows} observations, {n_unique_balance_ids} patients"

# Initialize a list to collect rows for the final DataFrame.
# The first row is the population summary row.
rows = []

# Create the first row: A special row with Target = "Population Summary"
pop_row = {"Target": "Total"}
pop_row.update(pop_summary)
rows.append(pop_row)

# For each target, compute the event count and event rate for each population group.
for target in targets:
    row = {"Target": target}
    for group_name, X_group in populations.items():
        # Align indices of y with this group's indices and select the target.
        y_group = y.loc[X_group.index, target]
        n_obs = len(y_group)
        n_events = (y_group == 1).sum()  # Assuming an event is coded as 1
        event_rate = (n_events / n_obs) * 100 if n_obs > 0 else 0
        row[group_name] = f"{n_events} ({event_rate:.1f})"
    rows.append(row)

# Convert the list of rows to a DataFrame.
summary_df = pd.DataFrame(rows)

# Save the summary DataFrame to a CSV file.
summary_df.to_csv("risk_group_distribution.csv", index=False)
summary_df

In [None]:
import pickle
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import resample
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.metrics import (
    roc_auc_score, f1_score, accuracy_score, balanced_accuracy_score, 
    recall_score, average_precision_score, brier_score_loss
)

n_bootstraps = 1000
rng = np.random.RandomState(42)

# Initialize a dictionary to store results
results = {}

with PdfPages("calibration_plots.pdf") as pdf:
    # Iterate over each target in the targets list
    for target in targets:
        # Load the model from the file
        with open(f'calibrated_model_{target}.pkl', 'rb') as file:
            subgroup_model = pickle.load(file)
    
        print(f"Evaluating performance for target: {target}")
        
        # Initialize a dictionary for this target
        results[target] = {}
    
        # For each population, evaluate the model
        for group_name, X_group in populations.items():
            y_group = y.loc[X_group.index, target]  # Align indices and select the target column
            
            # Ensure binary/matching targets for evaluation
            if len(y_group.unique()) > 2:
                print(f"Skipping {group_name} population for target {target}: Non-binary target detected.")
                continue
    
            # Predict with the trained model
            y_pred = subgroup_model.predict(X_group)
            y_proba = subgroup_model.predict_proba(X_group)[:, 1]  # For AUC-ROC (probability of the positive class)
    
            boot_metrics = {
            'roc_auc': [],
            'f1': [],
            'f1_weighted': [],
            'accuracy': [],
            'balanced_accuracy': [],
            'recall_weighted': [],
            'average_precision': [],
            'brier_score_loss': [],
            'calibration_slope': [],
            'calibration_intercept': []
        }
            # Initialize a dictionary for this target
            results[target][group_name] = {}
        
            for i in range(n_bootstraps):
                indices = rng.choice(range(len(y_group)), size=len(y_group), replace=True)
                y_true_bs = y_group.iloc[indices]
                y_pred_bs = y_pred[indices]
                y_proba_bs = y_proba[indices]
            
                try:
                    boot_metrics['roc_auc'].append(roc_auc_score(y_true_bs, y_proba_bs))
                    boot_metrics['f1'].append(f1_score(y_true_bs, y_pred_bs))
                    boot_metrics['f1_weighted'].append(f1_score(y_true_bs, y_pred_bs, average='weighted'))
                    boot_metrics['accuracy'].append(accuracy_score(y_true_bs, y_pred_bs))
                    boot_metrics['balanced_accuracy'].append(balanced_accuracy_score(y_true_bs, y_pred_bs))
                    boot_metrics['recall_weighted'].append(recall_score(y_true_bs, y_pred_bs, average='weighted'))
                    boot_metrics['average_precision'].append(average_precision_score(y_true_bs, y_proba_bs))

                    # Compute calibration slope and intercept:
                    # Clip probabilities to avoid numeric issues with the logit transformation
                    y_proba_bs_clipped = np.clip(y_proba_bs, 1e-15, 1-1e-15)
                    logits = np.log(y_proba_bs_clipped / (1 - y_proba_bs_clipped))
                    # Use linear regression (np.polyfit returns [slope, intercept] when degree=1)
                    slope, intercept = np.polyfit(logits, y_true_bs, 1)
                    boot_metrics['calibration_slope'].append(slope)
                    boot_metrics['calibration_intercept'].append(intercept)
                
                except ValueError:
                    # Happens if a bootstrap sample contains only one class
                    continue
        
            def ci_bounds(metric_list):
                return np.percentile(metric_list, [2.5, 97.5])
            
            results[target][group_name] = {}
            
            for metric, scores in boot_metrics.items():
                if len(scores) > 0:
                    mean_val = np.mean(scores)
                    ci_low, ci_high = ci_bounds(scores)
                    results[target][group_name][metric] = {
                        'mean': mean_val,
                        '95% CI': (ci_low, ci_high)
                }

             # Compute calibration curve
            prob_true, prob_pred = calibration_curve(y_pred, y_proba, n_bins=10)
        
            # Plot calibration curve 
            fig, ax = plt.subplots(figsize=(7, 6))
            ax.plot(prob_pred, prob_true, marker='o', label="Model Calibration")
            ax.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Perfect Calibration")
            ax.set_xlabel("Mean Predicted Probability")
            ax.set_ylabel("Fraction of Positives")
            ax.set_title(f"Calibration Plot: {target}")
            ax.legend()
            ax.grid(True)
            
            plt.tight_layout()
            plt.show()
    
            # Save the current figure into the PDF
            pdf.savefig(fig)
            plt.close(fig)

In [None]:
rows = []

for target, group in results.items():
    for group_name, metrics in group.items():
        row = {
            'Target': target,
            'Subgroup': group_name
        }

        for metric_name, value in metrics.items():
            if isinstance(value, dict) and 'mean' in value and '95% CI' in value:
                # It's a nested dictionary with mean and confidence interval
                row[f"{metric_name}_mean"] = round(value['mean'], 3)
                row[f"{metric_name}_ci_low"] = round(value['95% CI'][0], 3)
                row[f"{metric_name}_ci_high"] = round(value['95% CI'][1], 3)
            elif isinstance(value, list) and len(value) == 1:
                # Single value stored as a list
                row[metric_name] = round(value[0], 3)
            else:
                # Fallback for unexpected structure
                row[metric_name] = value

        rows.append(row)

# Create DataFrame
results_df = pd.DataFrame(rows)

# Display
display(results_df)

# Save to CSV
results_df.to_csv('results_subgroups.csv', index=False)

# Plots

In [None]:
# Plotting alligned with plot notebook 

# Set figure size and style
plt.figure(figsize=(20, 6))

custom_target_names = {
     'C30_PF2_class': "Physical functioning",
     'C30_RF2_class': "Role functioning",
     'C30_EF_class': "Emotional functioning",  
     'C30_CF_class': "Cognitive functioning",
     'C30_SF_class': "Social functioning",
     'C30_FA_class': "Fatigue",
     'C30_NV_class': "Nausea and vomiting",
     'C30_PA_class': "Pain",
     'C30_DY_class': "Dyspnoea",
     'C30_SL_class': "Insomnia",
     'C30_AP_class': "Appetite loss",
     'C30_CO_class': "Constipation",
     'C30_DI_class': "Diarrhoea",
     'C30_FI_class': "Financial difficulties"
 }

subgroup = ['Menopause', 'Financial difficulties', 'Obese',
       'Comorbidities > 1', 'Lower educational status', 'Frail', 'Full external dataset'
       ]

# Force specific target order
targets = list(custom_target_names.keys())
custom_targets = list(custom_target_names.values())

# Ensure the DataFrame is ordered according to custom target order
results_df['Target'] = pd.Categorical(results_df['Target'], categories=targets, ordered=True)
results_df = results_df.sort_values(['Target', 'Subgroup'])

# Define bar width dynamically to prevent overlap
num_subgroup = len(subgroup)
bar_width = min(0.8 / num_subgroup, 0.5)  # Adjusts width for large numbers

x = np.arange(len(targets))  # X positions for bars

# Define colors for subgroups (options: viridis, magma, plasma, inferno)
cmap = plt.get_cmap('plasma')  # Colormap
colors = [cmap(i / (num_subgroup - 1)) for i in range(num_subgroup)]  # Evenly spaced colors

# Plot bars with asymmetric error bars
for i, (sg, color) in enumerate(zip(subgroup, colors)):
    subset = results_df[results_df['Subgroup'] == sg]
    plt.bar(
        x + i * bar_width,
        subset['roc_auc_mean'],
        yerr=[
            subset['roc_auc_mean'] - subset['roc_auc_ci_low'],  # Lower error
            subset['roc_auc_ci_high'] - subset['roc_auc_mean']  # Upper error
        ],
        capsize=2,
        width=bar_width,
        label=sg,
        color=color,
        edgecolor='black',   
        linewidth=0.5  
    )


# Customize the plot
plt.title("A) Model performance in risk groups", fontsize=16, loc="left")
plt.ylabel("Mean ROC AUC (95% CI)", fontsize=14)
plt.xlabel("Scales", fontsize=14)
plt.xticks(x + (len(subgroup) - 1) * bar_width / 2, custom_targets, rotation=20, fontsize=12, ha="right")  # Center tick labels
plt.ylim(0.4, 1.0)
plt.yticks(np.arange(0.4, 1.01, 0.1))
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(title="Risk groups", fontsize=10, loc='lower right', frameon=True)

# Show plot
plt.show()

In [None]:
# Plotting alligned with plot notebook 

# Set figure size and style
plt.figure(figsize=(20, 6))

custom_target_names = {
     'C30_PF2_class': "Physical functioning",
     'C30_RF2_class': "Role functioning",
     'C30_EF_class': "Emotional functioning",  
     'C30_CF_class': "Cognitive functioning",
     'C30_SF_class': "Social functioning",
     'C30_FA_class': "Fatigue",
     'C30_NV_class': "Nausea and vomiting",
     'C30_PA_class': "Pain",
     'C30_DY_class': "Dyspnoea",
     'C30_SL_class': "Insomnia",
     'C30_AP_class': "Appetite loss",
     'C30_CO_class': "Constipation",
     'C30_DI_class': "Diarrhoea",
     'C30_FI_class': "Financial difficulties"
 }

subgroup = ['After_diagnosis','Future_assessment', 'Full external dataset']
#subgroup = results_df['Subgroup'].unique() # general code for any subgroups

# Force specific target order
targets = list(custom_target_names.keys())
custom_targets = list(custom_target_names.values())

# Ensure the DataFrame is ordered according to custom target order
results_df['Target'] = pd.Categorical(results_df['Target'], categories=targets, ordered=True)
results_df = results_df.sort_values(['Target', 'Subgroup'])

# Define bar width dynamically to prevent overlap
num_subgroup = len(subgroup)
bar_width = min(0.8 / num_subgroup, 0.2)  # Adjusts width for large numbers

x = np.arange(len(targets))  # X positions for bars

# Define colors for subgroups (options: viridis, magma, plasma, inferno)
cmap = plt.get_cmap('plasma')  # Colormap
colors = [cmap(i / (num_subgroup - 1)) for i in range(num_subgroup)]  # Evenly spaced colors

# Plot bars with asymmetric error bars
for i, (sg, color) in enumerate(zip(subgroup, colors)):
    subset = results_df[results_df['Subgroup'] == sg]
    plt.bar(
        x + i * bar_width,
        subset['roc_auc_mean'],
        yerr=[
            subset['roc_auc_mean'] - subset['roc_auc_ci_low'],  # Lower error
            subset['roc_auc_ci_high'] - subset['roc_auc_mean']  # Upper error
        ],
        capsize=2,
        width=bar_width,
        label=sg,
        color=color,
        edgecolor='black',   
        linewidth=0.5  
    )


# Customize the plot
plt.title("B) Time dynamic performance evaluation", fontsize=16, loc="left")
plt.ylabel("Mean ROC AUC (95% CI)", fontsize=14)
plt.xlabel("Scales", fontsize=14)
plt.xticks(x + (len(subgroup) - 1) * bar_width / 2, custom_targets, rotation=20, fontsize=12, ha="right")  # Center tick labels
plt.ylim(0.4, 1.0)
plt.yticks(np.arange(0.4, 1.01, 0.1))
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(fontsize=10, loc='lower right', frameon=True)

# Show plot
plt.show()