# This Notebook aggregates all results to plot them 

In [None]:
# Merging all result data frames from next_assessment_dynamic, next_assessment_baseline & next_assessment_external_validation

import numpy as np
import pandas as pd 

results = pd.read_csv('results_classifier_selection.csv', sep=',', low_memory=False)
LOCF = pd.read_csv('results_LOCF_AUC.csv', sep=',', low_memory=False)
external = pd.read_csv('results_external_validation.csv', sep=',', low_memory=False)

# Plot results for classifier evaluation 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from constants import targets

# Set figure size and style
plt.figure(figsize=(20, 6))

custom_target_names = {
     'C30_PF2_class': "Physical functioning",
     'C30_RF2_class': "Role functioning",
     'C30_EF_class': "Emotional functioning",  
     'C30_CF_class': "Cognitive functioning",
     'C30_SF_class': "Social functioning",
     'C30_FA_class': "Fatigue",
     'C30_NV_class': "Nausea and vomiting",
     'C30_PA_class': "Pain",
     'C30_DY_class': "Dyspnoea",
     'C30_SL_class': "Insomnia",
     'C30_AP_class': "Appetite loss",
     'C30_CO_class': "Constipation",
     'C30_DI_class': "Diarrhoea",
     'C30_FI_class': "Financial difficulties"
 }

# Get unique targets and classifiers
# Force specific target order
targets = list(custom_target_names.keys())
custom_targets = list(custom_target_names.values())

# Ensure the DataFrame is ordered according to custom target order
results['Target'] = pd.Categorical(results['Target'], categories=targets, ordered=True)
results = results.sort_values(['Target', 'Classifier'])
classifiers = results['Classifier'].unique()

# Define bar width dynamically to prevent overlap
num_classifiers = len(classifiers)
bar_width = min(1.0 / num_classifiers, 0.15)  # Adjusts width for large numbers

x = np.arange(len(targets))  # X positions for bars

# Define colors for classifiers (options: viridis, magma, plasma, inferno)
cmap = plt.get_cmap('plasma')  # Colormap
colors = [cmap(i / (num_classifiers - 1)) for i in range(num_classifiers)]  # Evenly spaced colors

# Plot bars with error bars
for i, (classifier, color) in enumerate(zip(classifiers, colors)):
    subset = results[results['Classifier'] == classifier]

    # Check if 'roc_auc_sd' column exists and has non-null values
    if 'roc_auc_sd' in subset.columns and not subset['roc_auc_sd'].isnull().all():
        yerr = subset['roc_auc_sd']
    else:
        yerr = [
            subset['roc_auc_mean'] - subset['roc_auc_ci_low'],  # Lower error
            subset['roc_auc_ci_high'] - subset['roc_auc_mean']  # Upper error
        ]

    plt.bar(
        x + i * bar_width,  # Shift bars for each classifier
        subset['roc_auc'],
        yerr=yerr,
        capsize=2,
        width=bar_width,
        label=classifier,
        color=color,
        edgecolor='black',   
        linewidth=0.5
    )

# Customize the plot
plt.title("A) Performance of all evaluated algorithms", fontsize=16, loc="left")
plt.ylabel("Mean ROC AUC", fontsize=14)
plt.xlabel("Scales", fontsize=14)
plt.xticks(x + (len(classifiers) - 1) * bar_width / 2, custom_targets, rotation=20, fontsize=12, ha="right")  
plt.ylim(0.5, 1.0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(title="Algorithms", fontsize=10)

# Show plot
plt.show()

# Plot results for validation

In [None]:
# select best classifier 
df = pd.concat([results[results["Classifier"] == 'HistGradientBoostingClassifier'], LOCF, external], ignore_index=True)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Set figure size and style
plt.figure(figsize=(20, 6))

custom_target_names = {
     'C30_PF2_class': "Physical functioning",
     'C30_RF2_class': "Role functioning",
     'C30_EF_class': "Emotional functioning",  
     'C30_CF_class': "Cognitive functioning",
     'C30_SF_class': "Social functioning",
     'C30_FA_class': "Fatigue",
     'C30_NV_class': "Nausea and vomiting",
     'C30_PA_class': "Pain",
     'C30_DY_class': "Dyspnoea",
     'C30_SL_class': "Insomnia",
     'C30_AP_class': "Appetite loss",
     'C30_CO_class': "Constipation",
     'C30_DI_class': "Diarrhoea",
     'C30_FI_class': "Financial difficulties"
 }

# Get unique targets and classifiers
targets = df['Target'].unique()
custom_targets = [custom_target_names.get(t, t) for t in targets]

classifiers = df['Classifier'].unique()

# Define bar width dynamically to prevent overlap
num_classifiers = len(classifiers)
bar_width = min(0.8 / num_classifiers, 0.2)  # Adjusts width for large numbers

x = np.arange(len(targets))  # X positions for bars

# Define colors for classifiers (options: viridis, magma, plasma, inferno)
cmap = plt.get_cmap('plasma')  # Colormap
colors = [cmap(i / (num_classifiers - 1)) for i in range(num_classifiers)]  # Evenly spaced colors

# Plot bars with error bars
for i, (classifier, color) in enumerate(zip(classifiers, colors)):
    subset = df[df['Classifier'] == classifier]

    # Check if 'roc_auc_sd' column exists and has non-null values
    if 'roc_auc_sd' in subset.columns and not subset['roc_auc_sd'].isnull().all():
        yerr = subset['roc_auc_sd']
    else:
        yerr = [
            subset['roc_auc'] - subset['roc_auc_ci_low'],  # Lower error
            subset['roc_auc_ci_high'] - subset['roc_auc']  # Upper error
        ]
        
    plt.bar(
        x + i * bar_width,  # Shift bars for each classifier
        subset['roc_auc'],
        yerr=yerr,
        capsize=2,
        width=bar_width,
        label=classifier,
        color=color,
        edgecolor='black',   
        linewidth=0.5
    )

# Customize the plot
plt.title("B) Model evaluation in external dataset and against LOCF", fontsize=16, loc="left")
plt.ylabel("Mean ROC AUC", fontsize=14)
plt.xlabel("Scales", fontsize=12)
plt.xticks(x + (len(classifiers) - 1) * bar_width / 2, custom_targets, rotation=20, fontsize=12, ha="right")  # Center tick labels
plt.ylim(0.5, 1.0)
plt.grid(axis='y', linestyle='--', alpha=0.7)

custom_labels = ['Internal cross validation', 'LOCF', 'External dataset']

# Apply custom legend
plt.legend(labels=custom_labels, fontsize=10)

# Show plot
plt.show()