# Visualizations For Paper

## Group Sparsity

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import joblib

plt.rcParams['figure.dpi'] = 300

def plot_group_sparsity_performance(ax, metric: str, linewidth=1, error_width=0.8, markersize=4, capsize=6, fmt='s', figsize=(8,6), fontsize=14) -> None:
    color = iter(sns.color_palette())
    assert metric in ['auroc', 'auprc']
    y, y_err = [], []
    y2, y2_err = [], []
    for group_sparsity in range(10, 50, 5):
        stats = joblib.load(f'results/fasterrisk/fasterrisk-{group_sparsity}')
        mean = np.mean(stats[metric])
        std = np.std(stats[metric])
        y.append(mean)
        y_err.append(std)
        stats = joblib.load(f"results/autoscore/autoscore-mimic")
        y2.append(np.mean(stats[group_sparsity][metric]))
        y2_err.append(np.std(stats[group_sparsity][metric]))
    if metric == 'auroc':
        oasis_mean, oasis_err = 0.775, 0.008
        sapsii_mean, sapsii_err = 0.795, 0.009
    else:
        oasis_mean, oasis_err = 0.314, 0.014
        sapsii_mean, sapsii_err = 0.342, 0.012
    ax.plot(list(range(10, 50, 5)), y, color='#cf3d3e', linewidth=linewidth)
    ax.errorbar(list(range(10, 50, 5)), y, yerr=y_err, color='#cf3d3e', capsize=capsize, fmt='s', markersize=markersize, linewidth=error_width, label='GFR')
    
    # ax.plot(list(range(10, 50, 5)), y2, color='grey', linewidth=linewidth)
    # ax.errorbar(list(range(10, 50, 5)), y2, yerr=y2_err, color='grey', capsize=capsize, fmt='o', markersize=markersize, linewidth=error_width, label='AutoScore')
    
    ax.errorbar(10, oasis_mean, oasis_err, color='#40815f', capsize=capsize, fmt='^', markersize=12, linewidth=error_width, label='OASIS')
    ax.errorbar(17, sapsii_mean, sapsii_err, color='#FAC100', capsize=capsize, fmt='D', markersize=11, linewidth=error_width, label='SAPS-II')
    ax.set_xlabel('Group Sparsity (Variables)', fontsize=fontsize+4)
    # ax.set_title(f"MIMIC III Test Folds, {metric.upper()}", fontsize=fontsize+2, fontweight='bold')
    ax.tick_params(axis='both', which='major', labelsize=fontsize)

def plot_group_sparsity_performance_ood(ax, metric: str, linewidth=1, error_width=80, markersize=4, capsize=6, fmt='s', figsize=(8,6), fontsize=14) -> None:
    color = iter(sns.color_palette())
    assert metric in ['auroc', 'auprc']
    y, y2 = [], []
    for group_sparsity in range(10, 50, 5):
        stats = joblib.load(f'results/fasterrisk/fasterrisk-{group_sparsity}-ood-stats')
        y.append(stats[metric])
        stats = joblib.load(f"results/autoscore/autoscore-ood")
        y2.append(stats[group_sparsity][metric])
    if metric == 'auroc':
        oasis, sapsii, apacheiv, apacheiva = 0.805, 0.844, 0.871, 0.873
    else:
        oasis, sapsii, apacheiv, apacheiva = 0.361, 0.433, 0.487, 0.489
    ax.plot(list(range(10, 50, 5)), y, color='#cf3d3e', linewidth=linewidth)
    ax.errorbar(x=list(range(10, 50, 5)), y=y, fmt='s', capsize=capsize, color='#cf3d3e', linewidth=error_width, markersize=markersize, label='GFR')
    
    # ax.plot(list(range(10, 50, 5)), y2, color='grey', linewidth=linewidth)
    # ax.errorbar(x=list(range(10, 50, 5)), y=y2, fmt='o', capsize=capsize, color='grey', linewidth=error_width, markersize=markersize, label='AutoScore')
    
    ax.errorbar(10, oasis, color='#40815f', capsize=capsize, fmt='^', markersize=12, linewidth=error_width, label='OASIS')
    ax.errorbar(17, sapsii, color='#FAC100', capsize=capsize, fmt='D', markersize=11, linewidth=error_width, label='SAPS-II')
    ax.errorbar(142, apacheiv, color='#f46F43', capsize=capsize, fmt='X', markersize=14, linewidth=error_width, label='APACHE IV')
    ax.errorbar(142, apacheiva, color='#5d9cab', capsize=capsize, fmt='v', markersize=12, linewidth=error_width, label='APACHE IVa')
    ax.set_xscale('log')
    ax.set_xlabel('Group Sparsity (Variables)', fontsize=fontsize+4)
    # ax.set_title(f"eICU Out of Distribution Evaluation, {metric.upper()}", fontsize=fontsize+2, fontweight='bold')
    ax.tick_params(axis='both', which='major', labelsize=fontsize)

sns.set_style('ticks')
fontsize = 22
linewidth = 1.5
error_width = 2
capsize = 8
markersize = 10
figsize = (9, 6)

ylim1 = (0.76, 0.88)
ylim2 = (0.28, 0.5)

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9) 
plot_group_sparsity_performance(axes, 'auroc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylim(ylim1)
plt.ylabel("AUROC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_noautoscore_perform_1.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance(axes, 'auprc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylim(ylim2)
plt.ylabel("AUPRC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_noautoscore_perform_2.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance_ood(axes, metric='auroc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
axes.legend(fontsize=fontsize+1, loc='lower right')
plt.ylim(ylim1)
plt.ylabel("AUROC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_noautoscore_perform_3.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance_ood(axes, metric='auprc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylabel("AUPRC", fontsize=fontsize+4)
axes.legend(fontsize=fontsize+1, loc='lower right')
plt.ylim(ylim2)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_noautoscore_perform_4.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import joblib

plt.rcParams['figure.dpi'] = 300

def plot_group_sparsity_performance(ax, metric: str, linewidth=1, error_width=0.8, markersize=4, capsize=6, fmt='s', figsize=(8,6), fontsize=14) -> None:
    color = iter(sns.color_palette())
    assert metric in ['auroc', 'auprc']
    y, y_err = [], []
    y2, y2_err = [], []
    for group_sparsity in range(10, 50, 5):
        stats = joblib.load(f'results/fasterrisk/fasterrisk-{group_sparsity}')
        mean = np.mean(stats[metric])
        std = np.std(stats[metric])
        y.append(mean)
        y_err.append(std)
        stats = joblib.load(f"results/autoscore/autoscore-mimic")
        y2.append(np.mean(stats[group_sparsity][metric]))
        y2_err.append(np.std(stats[group_sparsity][metric]))
    if metric == 'auroc':
        oasis_mean, oasis_err = 0.775, 0.008
        sapsii_mean, sapsii_err = 0.795, 0.009
    else:
        oasis_mean, oasis_err = 0.314, 0.014
        sapsii_mean, sapsii_err = 0.342, 0.012
    ax.plot(list(range(10, 50, 5)), y, color='#cf3d3e', linewidth=linewidth)
    ax.errorbar(list(range(10, 50, 5)), y, yerr=y_err, color='#cf3d3e', capsize=capsize, fmt='s', markersize=markersize, linewidth=error_width, label='GFR')
    
    ax.plot(list(range(10, 50, 5)), y2, color='grey', linewidth=linewidth)
    ax.errorbar(list(range(10, 50, 5)), y2, yerr=y2_err, color='grey', capsize=capsize, fmt='o', markersize=markersize, linewidth=error_width, label='AutoScore')
    
    ax.errorbar(10, oasis_mean, oasis_err, color='#40815f', capsize=capsize, fmt='^', markersize=12, linewidth=error_width, label='OASIS')
    ax.errorbar(17, sapsii_mean, sapsii_err, color='#FAC100', capsize=capsize, fmt='D', markersize=11, linewidth=error_width, label='SAPS-II')
    ax.set_xlabel('Group Sparsity (Variables)', fontsize=fontsize+4)
    ax.set_title(f"MIMIC III Test Folds, {metric.upper()}", fontsize=fontsize+2, fontweight='bold')
    ax.tick_params(axis='both', which='major', labelsize=fontsize)

def plot_group_sparsity_performance_ood(ax, metric: str, linewidth=1, error_width=80, markersize=4, capsize=6, fmt='s', figsize=(8,6), fontsize=14) -> None:
    color = iter(sns.color_palette())
    assert metric in ['auroc', 'auprc']
    y, y2 = [], []
    for group_sparsity in range(10, 50, 5):
        stats = joblib.load(f'results/fasterrisk/fasterrisk-{group_sparsity}-ood-stats')
        y.append(stats[metric])
        stats = joblib.load(f"results/autoscore/autoscore-ood")
        y2.append(stats[group_sparsity][metric])
    if metric == 'auroc':
        oasis, sapsii, apacheiv, apacheiva = 0.805, 0.844, 0.871, 0.873
    else:
        oasis, sapsii, apacheiv, apacheiva = 0.361, 0.433, 0.487, 0.489
    ax.plot(list(range(10, 50, 5)), y, color='#cf3d3e', linewidth=linewidth)
    ax.errorbar(x=list(range(10, 50, 5)), y=y, fmt='s', capsize=capsize, color='#cf3d3e', linewidth=error_width, markersize=markersize, label='GFR')
    
    ax.plot(list(range(10, 50, 5)), y2, color='grey', linewidth=linewidth)
    ax.errorbar(x=list(range(10, 50, 5)), y=y2, fmt='o', capsize=capsize, color='grey', linewidth=error_width, markersize=markersize, label='AutoScore')
    
    ax.errorbar(10, oasis, color='#40815f', capsize=capsize, fmt='^', markersize=12, linewidth=error_width, label='OASIS')
    ax.errorbar(17, sapsii, color='#FAC100', capsize=capsize, fmt='D', markersize=11, linewidth=error_width, label='SAPS-II')
    ax.errorbar(142, apacheiv, color='#f46F43', capsize=capsize, fmt='X', markersize=14, linewidth=error_width, label='APACHE IV')
    ax.errorbar(142, apacheiva, color='#5d9cab', capsize=capsize, fmt='v', markersize=12, linewidth=error_width, label='APACHE IVa')
    ax.set_xscale('log')
    ax.set_xlabel('Group Sparsity (Variables)', fontsize=fontsize+4)
    ax.set_title(f"eICU Out of Distribution Evaluation, {metric.upper()}", fontsize=fontsize+2, fontweight='bold')
    ax.tick_params(axis='both', which='major', labelsize=fontsize)

sns.set_style('ticks')
fontsize = 22
linewidth = 1.5
error_width = 2
capsize = 8
markersize = 10
figsize = (9, 6)

ylim1 = (0, 0.88)
ylim2 = (0, 0.5)

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9) 
plot_group_sparsity_performance(axes, 'auroc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylim(ylim1)
plt.ylabel("AUROC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_autoscore_perform_1.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance(axes, 'auprc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylim(ylim2)
plt.ylabel("AUPRC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_autoscore_perform_2.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance_ood(axes, metric='auroc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
# axes.legend(fontsize=fontsize+1, loc='lower right')
plt.ylim(ylim1)
plt.ylabel("AUROC", fontsize=fontsize+4)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_autoscore_perform_3.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

fig, axes = plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.9)
plot_group_sparsity_performance_ood(axes, metric='auprc', fontsize=fontsize, linewidth=linewidth, error_width=error_width, capsize=capsize, markersize=markersize)
plt.ylabel("AUPRC", fontsize=fontsize+4)
axes.legend(fontsize=fontsize+1, loc='lower right')
plt.ylim(ylim2)
sns.despine()
plt.tight_layout()
plt.savefig('group_sparsity_autoscore_perform_4.pdf', dpi=300, format='pdf')
# plt.tight_layout()
plt.close()

## Complexity Graph

In [None]:
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 300

def get_good_label(label: str):
    if label == 'adaboost':
        return 'AdaBoost'
    elif label == 'xgboost':
        return 'XGBoost'
    elif label == 'random-forest':
        return "Random Forest"
    elif label == 'nonlinear-logreg-l1':
        return "LogReg (L1)"
    elif label == 'nonlinear-logreg-l2':
        return "LogReg (L2)"
    elif label == 'ebm':
        return "EBM"
    elif label == 'autoscore':
        return 'AutoScore'

def plot_complexity_graph(ax, exp: str, metric: str, title, figsize=(12, 6), fontsize=10, size=100, alpha=0.6):
    assert metric in ['auroc', 'auprc']
    marker = '^'
    small_offset = 100
    if exp == "oasis+":
        result = joblib.load(f"results/fasterrisk/fasterrisk-19")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load(f"results/oasis+/fasterrisk-oasis")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (OASIS)', color='black', marker='v', s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-mimic")
        # sns.scatterplot(x=result[14]['complexity'], y=result[14][metric], label='AutoScore', color='#CAAD9D', marker='v', s=size+small_offset, ax=ax)
        sns.scatterplot(x=result[14]['complexity'], y=result[14][metric], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    elif exp == "union49":
        result = joblib.load("results/union49/fasterrisk")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-mimic")
        sns.scatterplot(x=result[45]['complexity'], y=result[45][metric], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    marker = iter(['o', 's', 'v', 'D', 'P', 'X'])
    color = iter(sns.color_palette())
    next(color)
    # color = iter(['#B77067','#CE9E34', '#617240', '#304D68', '#91A172', '#965911', '#134276'])
    for model in ['adaboost', 'xgboost', 'random-forest', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'ebm']:
        result = joblib.load(f"results/{exp}/{model}")
        if model == 'logreg-l1': next(color)
        if model == 'random-forest': next(color)
        if model == 'ebm': next(color)
        offset = small_offset if model == 'random-forest' else 0
        ax = sns.scatterplot(x=result['complexity'], y=result[metric], label=get_good_label(model), marker=next(marker), s=size+offset, color=next(color), ax=ax, alpha=alpha)
    ax.set_xscale('log')
    ax.set_xlabel('Model Complexity', fontsize=fontsize+2)
    # ax.set_ylabel(f"Test {metric.upper()}", fontsize=fontsize)
    ax.tick_params(axis='both', which='major', labelsize=fontsize-5)
    ax.set_title(title, fontsize=fontsize, fontweight='bold')

def plot_complexity_graph_ood(ax, exp: str, metric: str, title, figsize=(12, 6), fontsize=10, size=100, alpha=0.6):
    assert metric in ['auroc', 'auprc']
    marker = '^'
    small_offset = 100
    if exp == "oasis+":
        result = joblib.load(f"results/fasterrisk/fasterrisk-19-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load(f"results/fasterrisk/fasterrisk-oasis-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (OASIS)', color='black', marker='v', s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-ood")
        sns.scatterplot(x=[result[14]['complexity']], y=[result[14][metric]], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    elif exp == "union49":
        result = joblib.load("results/fasterrisk/fasterrisk-40-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-ood")
        sns.scatterplot(x=[result[45]['complexity']], y=[result[45][metric]], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    marker = iter(['o', 's', 'v', 'D', 'P', 'X'])
    color = iter(sns.color_palette())
    next(color)
    # color = iter(['#B77067','#CE9E34', '#617240', '#304D68', '#91A172', '#965911', '#134276'])
    for model in ['adaboost', 'xgboost', 'random-forest', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'ebm']:
        result = joblib.load(f"results/{exp}/{model}-ood-stats")
        if model == 'logreg-l1': next(color)
        if model == 'random-forest': next(color)
        if model == 'ebm': next(color)
        if model == 'random-forest':
            offset = small_offset
        else:
            offset = 0
        ax = sns.scatterplot(x=[result['complexity']], y=[result[metric]], label=get_good_label(model), marker=next(marker), s=size+offset, color=next(color), ax=ax, alpha=alpha)
    ax.set_xscale('log')
    ax.set_xlabel('Model Complexity', fontsize=fontsize+2)
    # ax.set_ylabel(f"Test {metric.upper()}", fontsize=fontsize)
    ax.tick_params(axis='both', which='major', labelsize=fontsize-5) 
    ax.set_title(title, fontsize=fontsize, fontweight='bold')   

fontsize = 26
size = 200
sns.set_style('whitegrid')
fig, axes = plt.subplots(2, 4, figsize=(26, 12))

alpha = 0.7

plot_complexity_graph(axes[0, 0], 'union49', 'auroc', fontsize=fontsize, size=size, title=f"MIMIC, AUROC (All features)", alpha=alpha)
axes[0, 0].get_legend().remove()
plot_complexity_graph(axes[0, 1], 'union49', 'auprc', fontsize=fontsize, size=size, title=f"MIMIC, AUPRC (All features)", alpha=alpha)
axes[0, 1].get_legend().remove()
plot_complexity_graph(axes[0, 2], 'oasis+', 'auroc', fontsize=fontsize, size=size, title=f"MIMIC, AUROC (OASIS features)", alpha=alpha)
axes[0, 2].get_legend().remove()
plot_complexity_graph(axes[0, 3], 'oasis+', 'auprc', fontsize=fontsize, size=size, title=f"MIMIC, AUPRC (OASIS features)", alpha=alpha)
axes[0, 3].get_legend().remove()
plot_complexity_graph_ood(axes[1, 0], 'union49', 'auroc', fontsize=fontsize, size=size, title=f"eICU, AUROC (All features)", alpha=alpha)
axes[1, 0].get_legend().remove()
plot_complexity_graph_ood(axes[1, 1], 'union49', 'auprc', fontsize=fontsize, size=size, title=f"eICU, AUPRC (All features)", alpha=alpha)
axes[1, 1].get_legend().remove()
plot_complexity_graph_ood(axes[1, 2], 'oasis+', 'auroc', fontsize=fontsize, size=size, title=f"eICU, AUROC (OASIS features)", alpha=alpha)
axes[1, 2].get_legend().remove()
plot_complexity_graph_ood(axes[1, 3], 'oasis+', 'auprc', fontsize=fontsize, size=size, title=f"eICU, AUPRC (OASIS features)", alpha=alpha)
axes[1, 3].legend(fontsize=fontsize-6)
plt.tight_layout()
plt.subplots_adjust(wspace=0.2, hspace=0.3)
# plt.show()          # NOTE THIS IS 19 FEATURES
plt.savefig('complexity_19_autoscore.pdf', dpi=300, format='pdf')


In [None]:
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 300

def get_good_label(label: str):
    if label == 'adaboost':
        return 'AdaBoost'
    elif label == 'xgboost':
        return 'XGBoost'
    elif label == 'random-forest':
        return "Random Forest"
    elif label == 'nonlinear-logreg-l1':
        return "LogReg (L1)"
    elif label == 'nonlinear-logreg-l2':
        return "LogReg (L2)"
    elif label == 'ebm':
        return "EBM"
    elif label == 'autoscore':
        return 'AutoScore'

def plot_complexity_graph(ax, exp: str, metric: str, title, figsize=(12, 6), fontsize=10, size=100, alpha=0.7):
    assert metric in ['auroc', 'auprc']
    marker = '^'
    small_offset = 100
    if exp == "oasis+":
        result = joblib.load(f"results/fasterrisk/fasterrisk-14")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load(f"results/oasis+/fasterrisk-oasis")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (OASIS)', color='black', marker='v', s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-mimic")
        # sns.scatterplot(x=result[14]['complexity'], y=result[14][metric], label='AutoScore', color='#CAAD9D', marker='v', s=size+small_offset, ax=ax)
        sns.scatterplot(x=result[14]['complexity'], y=result[14][metric], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    elif exp == "union49":
        result = joblib.load("results/union49/fasterrisk")
        sns.scatterplot(x=result['complexity'], y=result[metric], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-mimic")
        sns.scatterplot(x=result[45]['complexity'], y=result[45][metric], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    marker = iter(['o', 's', 'v', 'D', 'P', 'X'])
    color = iter(sns.color_palette())
    next(color)
    # color = iter(['#B77067','#CE9E34', '#617240', '#304D68', '#91A172', '#965911', '#134276'])
    for model in ['adaboost', 'xgboost', 'random-forest', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'ebm']:
        result = joblib.load(f"results/{exp}/{model}")
        if model == 'logreg-l1': next(color)
        if model == 'random-forest': next(color)
        if model == 'ebm': next(color)
        offset = small_offset if model == 'random-forest' else 0
        ax = sns.scatterplot(x=result['complexity'], y=result[metric], label=get_good_label(model), marker=next(marker), s=size+offset, color=next(color), ax=ax, alpha=alpha)
    ax.set_xscale('log')
    ax.set_xlabel('Model Complexity', fontsize=fontsize+2)
    # ax.set_ylabel(f"Test {metric.upper()}", fontsize=fontsize)
    ax.tick_params(axis='both', which='major', labelsize=fontsize-5)
    ax.set_title(title, fontsize=fontsize, fontweight='bold')

def plot_complexity_graph_ood(ax, exp: str, metric: str, title, figsize=(12, 6), fontsize=10, size=100, alpha=0.7):
    assert metric in ['auroc', 'auprc']
    marker = '^'
    small_offset = 100
    if exp == "oasis+":
        result = joblib.load(f"results/fasterrisk/fasterrisk-14-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load(f"results/fasterrisk/fasterrisk-oasis-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (OASIS)', color='black', marker='v', s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-ood")
        sns.scatterplot(x=[result[14]['complexity']], y=[result[14][metric]], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    elif exp == "union49":
        result = joblib.load("results/fasterrisk/fasterrisk-40-ood-stats")
        sns.scatterplot(x=[result['complexity']], y=[result[metric]], label='GFR (ours)', color='red', marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
        result = joblib.load("results/autoscore/autoscore-ood")
        sns.scatterplot(x=[result[45]['complexity']], y=[result[45][metric]], label='AutoScore', color=sns.color_palette()[0], marker=marker, s=size+small_offset, ax=ax, alpha=alpha)
    marker = iter(['o', 's', 'v', 'D', 'P', 'X'])
    color = iter(sns.color_palette())
    next(color)
    # color = iter(['#B77067','#CE9E34', '#617240', '#304D68', '#91A172', '#965911', '#134276'])
    for model in ['adaboost', 'xgboost', 'random-forest', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'ebm']:
        result = joblib.load(f"results/{exp}/{model}-ood-stats")
        if model == 'logreg-l1': next(color)
        if model == 'random-forest': next(color)
        if model == 'ebm': next(color)
        if model == 'random-forest':
            offset = small_offset
        else:
            offset = 0
        ax = sns.scatterplot(x=[result['complexity']], y=[result[metric]], label=get_good_label(model), marker=next(marker), s=size+offset, color=next(color), ax=ax, alpha=alpha)
    ax.set_xscale('log')
    ax.set_xlabel('Model Complexity', fontsize=fontsize+2)
    # ax.set_ylabel(f"Test {metric.upper()}", fontsize=fontsize)
    ax.tick_params(axis='both', which='major', labelsize=fontsize-5) 
    ax.set_title(title, fontsize=fontsize, fontweight='bold')   

fontsize = 26
size = 200
sns.set_style('whitegrid')
fig, axes = plt.subplots(2, 4, figsize=(26, 13))

plot_complexity_graph(axes[0, 0], 'union49', 'auroc', fontsize=fontsize, size=size, title=f"MIMIC, AUROC (All features)")
axes[0, 0].get_legend().remove()
plot_complexity_graph(axes[0, 1], 'union49', 'auprc', fontsize=fontsize, size=size, title=f"MIMIC, AUPRC (All features)")
axes[0, 1].get_legend().remove()
plot_complexity_graph(axes[0, 2], 'oasis+', 'auroc', fontsize=fontsize, size=size, title=f"MIMIC, AUROC (OASIS features)")
axes[0, 2].get_legend().remove()
plot_complexity_graph(axes[0, 3], 'oasis+', 'auprc', fontsize=fontsize, size=size, title=f"MIMIC, AUPRC (OASIS features)")
axes[0, 3].get_legend().remove()
plot_complexity_graph_ood(axes[1, 0], 'union49', 'auroc', fontsize=fontsize, size=size, title=f"eICU, AUROC (All features)")
axes[1, 0].get_legend().remove()
plot_complexity_graph_ood(axes[1, 1], 'union49', 'auprc', fontsize=fontsize, size=size, title=f"eICU, AUPRC (All features)")
axes[1, 1].get_legend().remove()
plot_complexity_graph_ood(axes[1, 2], 'oasis+', 'auroc', fontsize=fontsize, size=size, title=f"eICU, AUROC (OASIS features)")
axes[1, 2].get_legend().remove()
plot_complexity_graph_ood(axes[1, 3], 'oasis+', 'auprc', fontsize=fontsize, size=size, title=f"eICU, AUPRC (OASIS features)")
axes[1, 3].legend(fontsize=fontsize-6)
plt.tight_layout()
plt.subplots_adjust(wspace=0.2, hspace=0.4)
# plt.show()          # NOTE THIS IS 14 FEATURES
plt.savefig('complexity_14_autoscore.pdf', dpi=300, format='pdf')

## Feature Selection

In [None]:
[*['adaboost']*5, *['xgboost']*5]

In [None]:
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams['figure.dpi'] = 300

def get_good_label(label: str):
    if label == 'adaboost':
        return 'AdaBoost'
    elif label == 'xgboost':
        return 'XGBoost'
    elif label == 'random-forest':
        return "RF"
    elif label == 'nonlinear-logreg-l1':
        return "LR (L1)"
    elif label == 'nonlinear-logreg-l2':
        return "LR (L2)"
    elif label == 'ebm':
        return "EBM"
    elif label == 'fasterrisk':
        return 'GFR Features'
    elif label == 'oasis':
        return "OASIS Features"

def plot_feature_selection(figsize=(24, 8), fontsize=10, saturation=0.6, fliersize=10, showfliers=False):
    algos, features, auroc, auprc = [], [], [], []
    # result = joblib.load("results/compare-feature/fasterrisk-fasterrisk-14")
    # for i in range(len(result['auroc'])):
    #     algos.append("FR-14")
    #     auprc.append(result['auprc'][i])
    #     auroc.append(result['auroc'][i])
    #     features.append("FasterRisk Features")
    
    # result = joblib.load("results/oasis+/fasterrisk-oasis")
    # for i in range(len(result['auroc'])):
    #     algos.append("FR-14")
    #     auprc.append(result['auprc'][i])
    #     auroc.append(result['auroc'][i])
    #     features.append("OASIS Features")
    
    for feature in ['fasterrisk', 'oasis']:
        for algo in ['nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'adaboost', 'ebm', 'xgboost', 'random-forest']:
            result = joblib.load(f"results/compare-feature/{feature}-{algo}")
            for i in range(len(result['auroc'])):
                auroc.append(result['auroc'][i])
                auprc.append(result['auprc'][i])
                algos.append(get_good_label(algo))
                features.append(get_good_label(feature))
                
    df = pd.DataFrame({
        'MIMIC III Test AUROC': auroc,
        'Algorithm': algos,
        'Feature': features,
        'MIMIC III Test AUPRC': auprc,
    })

    fig, axes = plt.subplots(1, 2, figsize=figsize)
    # ax = sns.boxplot(data=df, x='Algorithm', y=f"MIMIC III Test AUROC", hue='Feature', ax=axes[0], flierprops={"marker": "x"}, saturation=saturation, fliersize=fliersize, showfliers=showfliers)
    ax = sns.pointplot(data=df, x='Algorithm', y=f"MIMIC III Test AUROC", hue='Feature', ax=axes[0], join=False, dodge=0.5, markers='s', errorbar='sd', scale=1.5, capsize=0.1)
    
    ax.set_title("MIMIC, Test AUROC", fontsize=fontsize+2, fontweight='bold')
    ax.set_ylabel('AUROC', fontsize=fontsize)
    ax.set_xlabel('Complexity (increasing order)', fontsize=fontsize+2)
    ax.tick_params(axis='x', which='major', labelsize=fontsize)
    ax.tick_params(axis='y', which='major', labelsize=fontsize-3)
    ax.get_legend().remove()
    # ax = sns.boxplot(data=df, x='Algorithm', y=f"MIMIC III Test AUPRC", hue='Feature', ax=axes[1], flierprops={"marker": "x"}, saturation=saturation, fliersize=fliersize, showfliers=showfliers)
    ax = sns.pointplot(data=df, x='Algorithm', y=f"MIMIC III Test AUPRC", hue='Feature', ax=axes[1], join=False, dodge=0.5, markers='s', errorbar='sd', scale=1.5, capsize=0.1)
    
    ax.set_title("MIMIC, Test AUPRC", fontsize=fontsize+2, fontweight='bold')
    ax.set_ylabel('AUPRC', fontsize=fontsize)
    ax.set_xlabel('Complexity (increasing order)', fontsize=fontsize+2)
    ax.tick_params(axis='x', which='major', labelsize=fontsize)
    ax.tick_params(axis='y', which='major', labelsize=fontsize-3)
    ax.legend(fontsize=fontsize-2, loc='lower right')
    plt.tight_layout()
    # plt.show()

sns.set_style('whitegrid')
plot_feature_selection(fontsize=26, showfliers=True, saturation=0.8)
plt.savefig('feature_selection.pdf', dpi=300, format='pdf')

Perform t-test

In [None]:
from scipy.stats import ttest_rel
import joblib

def see_statistical_significance(metric: str, alternative: str='two-sided', sig_level: float=0.05):
    for algo in ['adaboost', 'xgboost', 'random-forest', 'logreg-l1', 'logreg-l2', 'ebm']:
        fasterrisk_stats = joblib.load(f"results/compare-feature/fasterrisk-{algo}")
        oasis_stats = joblib.load(f"results/compare-feature/oasis-{algo}")
        t_stat, p_val = ttest_rel(fasterrisk_stats[metric], oasis_stats[metric], alternative=alternative)

        if p_val < sig_level:
            sig = "*** SIGNIFICANT ***"
        else:
            sig = "*** NOT SIGNIFICANT ***"
        print(f"{get_good_label(algo)} performance: {sig}\nstatistic={t_stat}, p={p_val}\n")

see_statistical_significance('auroc', sig_level=0.01, alternative='greater')

In [None]:
see_statistical_significance('auprc', sig_level=0.01, alternative='greater')

In [None]:
import joblib
from scipy.stats import ttest_rel

def see_statistical_significance(metric: str, alternative: str='two-sided', sig_level: float=0.05):
    fasterrisk_stats = joblib.load("results/fasterrisk/fasterrisk-14")
    oasis_stats = joblib.load("results/oasis+/fasterrisk-oasis")
    t_stat, p_val = ttest_rel(fasterrisk_stats[metric], oasis_stats[metric], alternative=alternative)

    if p_val < sig_level:
        sig = "*** SIGNIFICANT ***"
    else:
        sig = "*** NOT SIGNIFICANT ***"
    print(f"FasterRisk-14 performance: {sig}\nstatistic={t_stat}, p={p_val}\n")

see_statistical_significance("auroc", alternative="greater", sig_level=0.01)
see_statistical_significance("auprc", alternative="greater", sig_level=0.01)

## Time Consumption

In [None]:
from collections import namedtuple

import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.dpi'] = 300

TimeMetric = namedtuple('TimeMetric', ['group_sparsity', 'sparsity', 'std_time', 'mean_time'])
a = TimeMetric(group_sparsity=5, sparsity=[20, 40, 60, 80, 100], std_time=[0.006325, 0.007115, 0.1355, 1.599, 4.407], mean_time=[3.776, 16.385, 41.943, 91.241, 138.434])
b = TimeMetric(group_sparsity=10, sparsity=[20, 40, 60, 80, 100], std_time=[0.01076, 0.01855, 0.05972, 0.09777, 1.067], mean_time=[4.932, 15.396, 37.77, 73.343, 120.994])
c = TimeMetric(group_sparsity=20, sparsity=[20, 40, 60, 80, 100], std_time=[0.004531, 0.02045, 0.01967, 0.05346, 2.648], mean_time=[4.642, 16.364, 34.792, 61.424, 97.89])
d = TimeMetric(group_sparsity=30, sparsity=[20, 40, 60, 80, 100], std_time=[0.001898, 0.01528, 0.01851, 0.0328, 0.1723], mean_time=[4.409, 16.229, 35.555, 55.526, 86.183])
e = TimeMetric(group_sparsity=40, sparsity=[20, 40, 60, 80, 100], std_time=[0.003582, 0.01359, 0.0182, 0.2101, 0.299], mean_time=[4.38, 16.233, 32.994, 55.794, 87.2])
f = TimeMetric(group_sparsity=45, sparsity=[20, 40, 60, 80, 100], std_time=[0.00477, 0.517, 0.04969, 0.09849, 1.543], mean_time=[4.383, 16.497, 33.06, 56.012, 87.849])

sns.set_style("ticks")
colors = iter(sns.color_palette('tab10'))
capsize = 20
fontsize = 45
figsize = (30, 10)
size = 12
saturation=0.6
linewidth = 4
for stats in [a, b, c, d, e, f]:
    color = next(colors)
    ax = sns.lineplot(x=stats.sparsity, y=stats.mean_time, label=f"Group Sparsity = {stats.group_sparsity}", marker='s', color=color, markersize=size, linewidth=linewidth)
    ax.errorbar(stats.sparsity, stats.mean_time, yerr=stats.std_time, fmt='s', capsize=capsize, color=color, linewidth=linewidth-0.5)
    ax.figure.set_size_inches(figsize[0], figsize[1])
plt.xlabel("Sparsity (number of splits)", fontsize=fontsize+2)
plt.ylabel("Time (minutes)", fontsize=fontsize+2)
plt.xticks(fontsize=fontsize-2)
plt.yticks(fontsize=fontsize-2)
plt.legend(fontsize=fontsize-4)
sns.despine()
plt.tight_layout()
# plt.show()
plt.savefig('gp_time.pdf', dpi=300, format='pdf')

## Disease Specific

In [None]:
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

plt.rcParams['figure.dpi'] = 300

def get_good_label(label: str):
    if 'fasterrisk' in label:
        return 'GFR-17'
    elif label == 'sapsii':
        return "SAPS II"
    elif label == 'sofa':
        return "SOFA"
    elif label == 'akf':
        return "Acute Kidney Failure"
    elif label == 'heart_failure':
        return "Heart Failure"
    elif label == 'sepsis':
        return "Sepsis/Septicemia"
    elif label == 'ami':
        return "Acute Myocardial Infarction"
    elif label == 'oasis':
        return "OASIS"

def plot_feature_selection(group_sparsity=10, figsize=(22, 12), fontsize=10, notch=False, width=0.5, saturation=0.6, fliersize=10, showfliers=False):
    methods, sickness, auroc, auprc = [], [], [], []
    for disease in ['sepsis', 'akf', 'heart_failure', 'ami']:
        for method in [f'fasterrisk-{group_sparsity}', 'sofa', 'oasis', 'sapsii']:
            if 'fasterrisk' in method:
                stats = joblib.load(f"results/disease/{disease}/fasterrisk-{group_sparsity}")
            else:
                stats = joblib.load(f"results/disease/{disease}/{method}_stats")
            tmp_auroc, tmp_auprc = [], []
            for i in range(len(stats['auroc'])):
                auroc.append(stats['auroc'][i])
                auprc.append(stats['auprc'][i])
                tmp_auroc.append(stats['auroc'][i])
                tmp_auprc.append(stats['auprc'][i])
                methods.append(get_good_label(method))
                sickness.append(get_good_label(disease))
            
            print(f"method: {method}, disease: {disease}, mean-auroc: {np.mean(tmp_auroc):.3f}, mean-auprc: {np.mean(tmp_auprc):.3f}")
                
    df = pd.DataFrame({
        'MIMIC III Test AUROC': auroc,
        'Method': methods,
        "Disease": sickness,
        'MIMIC III Test AUPRC': auprc,
    })

    fig, axes = plt.subplots(2, 1, figsize=figsize)
    # ax = sns.boxplot(data=df, x='Disease', y=f"MIMIC III Test AUROC", hue='Method', ax=axes[0], notch=notch, width=width, flierprops={"marker": "-"}, saturation=saturation, fliersize=fliersize, showfliers=showfliers, palette=sns.color_palette("tab10"))
    ax = sns.pointplot(data=df, x='Disease', y=f"MIMIC III Test AUROC", hue='Method', ax=axes[0], join=False, dodge=0.5, markers='s', errorbar='sd', scale=1.5, capsize=0.08, palette=sns.color_palette("tab10"))
    ax.set_ylabel('AUROC', fontsize=fontsize)
    ax.set_title("MIMIC, Test AUROC", fontsize=fontsize+2, fontweight='bold')
    ax.set_xlabel('')
    ax.get_legend().remove()
    ax.tick_params(axis='x', which='major', labelsize=fontsize)
    ax.tick_params(axis='y', which='major', labelsize=fontsize-6)
    # ax = sns.boxplot(data=df, x='Disease', y=f"MIMIC III Test AUPRC", hue='Method', ax=axes[1], notch=notch, width=width, flierprops={"marker": "-"}, saturation=saturation, fliersize=fliersize, showfliers=showfliers, palette=sns.color_palette("tab10"))
    ax = sns.pointplot(data=df, x='Disease', y=f"MIMIC III Test AUPRC", hue='Method', ax=axes[1], join=False, dodge=0.5, markers='s', errorbar='sd', scale=1.5, capsize=0.08, palette=sns.color_palette("tab10"))
    ax.set_ylabel('AUPRC', fontsize=fontsize)
    ax.set_title("MIMIC, Test AUPRC", fontsize=fontsize+2, fontweight='bold')
    ax.set_xlabel('')
    ax.tick_params(axis='x', which='major', labelsize=fontsize)
    ax.tick_params(axis='y', which='major', labelsize=fontsize-6)
    ax.legend(fontsize=fontsize-1)
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, hspace=0.2)
    # plt.show()

sns.set_style("whitegrid")
plot_feature_selection(fontsize=26, group_sparsity=17, showfliers=True, saturation=0.6)
plt.savefig('disease_barplot.pdf', dpi=300, format='pdf')

Perform t-test

In [None]:
from scipy.stats import ttest_rel
import joblib

def see_statistical_significance(metric: str, disease: str, group_sparsity: int, alternative: str='greater', sig_level: float=0.05):
    print(f"======= {disease}, {metric.upper()} =======")
    for method in ['oasis', 'sapsii', 'sofa']:
        fasterrisk_stats = joblib.load(f"results/disease/{disease}/fasterrisk-{group_sparsity}")
        stats = joblib.load(f"results/disease/{disease}/{method}_stats")
        t_stat, p_val = ttest_rel(fasterrisk_stats[metric], stats[metric], alternative=alternative)

        if p_val < sig_level:
            sig = "*** SIGNIFICANT ***"
        else:
            sig = "*** NOT SIGNIFICANT ***"
        print(f"{get_good_label(method)} performance: {sig}\nstatistic={t_stat}, p={p_val}\n")

for disease in ['sepsis', 'akf', 'heart_failure', 'ami']:
    for metric in ['auroc', 'auprc']:
        see_statistical_significance(metric=metric, disease=disease, group_sparsity=17, sig_level=0.05, alternative='greater')

## Dimension Reduction for Feature Selection Comparison (PaCMAP)

In [None]:
from pacmap import PaCMAP
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
plt.rcParams['figure.dpi'] = 300

oasis_features = [
                 'heartrate_min', 'heartrate_max', 'meanbp_min', 'meanbp_max', 'resprate_min', 'resprate_max', 'tempc_min', 
                'tempc_max', 'urineoutput', 'mechvent', 'electivesurgery', 'age', 'gcs_min', 'preiculos'
            ]

fasterrisk_features = [
                'tempc_max', 'bilirubin_max', 'urineoutput',  'age', 'gcs_min', 'sysbp_min', 'ph_min', 
                'heartrate_max', 'mechvent', 'mets', 'resprate_min', 'bun_max', 'glucose_min', 'pao2_max',
            ]

sns.set_style("white")
mimic_whole_df = pd.read_csv("data/MIMIC-WHOLE.csv")
X, y = mimic_whole_df.drop('hospital_expire_flag', axis=1)[oasis_features].values, mimic_whole_df['hospital_expire_flag'].values
X = np.where(np.isnan(X), 0, X)     # impute missing values with 0
positive_X, negative_X = X[y==1], X[y==-1]
num_positive, num_negative = len(positive_X), len(negative_X)
X = np.concatenate((positive_X, negative_X), axis=0)
embed = PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)
X_transformed = embed.fit_transform(X, init='pca')

plt.figure(figsize=(6, 6))
# ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap=ListedColormap(['#85C1E9', 'red']), c=y, s=0.6, alpha=0.7)
colors = ["red" for _ in range(num_positive)] +  ["#85C1E9" for _ in range(num_negative)]
plt.scatter(
    X_transformed[num_positive:, 0],
    X_transformed[num_positive:, 1],
    c=colors[num_positive:],
    label="Negative", alpha=0.4, s=1,
)
plt.scatter(
    X_transformed[:num_positive, 0], 
    X_transformed[:num_positive, 1], 
    c=colors[:num_positive], 
    label="Positive", alpha=0.4, s=1,
)
plt.title("MIMIC-III: PaCMAP Embedding for OASIS Features")
plt.show()

In [None]:
sns.set_style("white")
mimic_whole_df = pd.read_csv("data/MIMIC-WHOLE.csv")
X, y = mimic_whole_df.drop('hospital_expire_flag', axis=1)[fasterrisk_features].values, mimic_whole_df['hospital_expire_flag'].values
X = np.where(np.isnan(X), 0, X)     # impute missing values with 0
positive_X, negative_X = X[y==1], X[y==-1]
num_positive, num_negative = len(positive_X), len(negative_X)
X = np.concatenate((positive_X, negative_X), axis=0)
embed = PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)
X_transformed = embed.fit_transform(X, init='pca')

plt.figure(figsize=(6, 6))
# ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap=ListedColormap(['#85C1E9', 'red']), c=y, s=0.6, alpha=0.7)
colors = ["red" for _ in range(num_positive)] +  ["#85C1E9" for _ in range(num_negative)]
plt.scatter(
    X_transformed[num_positive:, 0],
    X_transformed[num_positive:, 1],
    c=colors[num_positive:],
    label="Negative", alpha=0.4, s=1,
)
plt.scatter(
    X_transformed[:num_positive, 0], 
    X_transformed[:num_positive, 1], 
    c=colors[:num_positive], 
    label="Positive", alpha=0.4, s=1,
)
plt.title("MIMIC-III: PaCMAP Embedding for FR-14 Features")
plt.show()

In [None]:
from pacmap import PaCMAP
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap

oasis_features = [
                 'heartrate_min', 'heartrate_max', 'meanbp_min', 'meanbp_max', 'resprate_min', 'resprate_max', 'tempc_min', 
                'tempc_max', 'urineoutput', 'mechvent', 'electivesurgery', 'age', 'gcs_min', 'preiculos'
            ]

fasterrisk_features = [
                'tempc_max', 'bilirubin_max', 'urineoutput',  'age', 'gcs_min', 'sysbp_min', 'ph_min', 
                'heartrate_max', 'mechvent', 'mets', 'resprate_min', 'bun_max', 'glucose_min', 'pao2_max',
            ]

sns.set_style("white")
eicu_df = pd.read_csv("data/eICU-union.csv")
X, y = eicu_df.drop('hospital_expire_flag', axis=1)[oasis_features].values, eicu_df['hospital_expire_flag'].values
X = np.where(np.isnan(X), 0, X)     # impute missing values with 0
positive_X, negative_X = X[y==1], X[y==0]
num_positive, num_negative = len(positive_X), len(negative_X)
X = np.concatenate((positive_X, negative_X), axis=0)
embed = PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)
X_transformed = embed.fit_transform(X, init='pca')

plt.figure(figsize=(6, 6))
# ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap=ListedColormap(['#85C1E9', 'red']), c=y, s=0.6, alpha=0.7)
colors = ["red" for _ in range(num_positive)] +  ["#85C1E9" for _ in range(num_negative)]
plt.scatter(
    X_transformed[num_positive:, 0],
    X_transformed[num_positive:, 1],
    c=colors[num_positive:],
    label="Negative", alpha=0.4, s=1,
)
plt.scatter(
    X_transformed[:num_positive, 0], 
    X_transformed[:num_positive, 1], 
    c=colors[:num_positive], 
    label="Positive", alpha=0.4, s=1,
)
plt.title("eICU: PaCMAP Embedding for OASIS Features")

In [None]:
sns.set_style("white")
eicu_df = pd.read_csv("data/eICU-union.csv")
X, y = eicu_df.drop('hospital_expire_flag', axis=1)[fasterrisk_features].values, eicu_df['hospital_expire_flag'].values
X = np.where(np.isnan(X), 0, X)     # impute missing values with 0
positive_X, negative_X = X[y==1], X[y==0]
num_positive, num_negative = len(positive_X), len(negative_X)
X = np.concatenate((positive_X, negative_X), axis=0)
embed = PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)
X_transformed = embed.fit_transform(X, init='pca')

plt.figure(figsize=(6, 6))
# ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap=ListedColormap(['#85C1E9', 'red']), c=y, s=0.6, alpha=0.7)
colors = ["red" for _ in range(num_positive)] +  ["#85C1E9" for _ in range(num_negative)]
plt.scatter(
    X_transformed[num_positive:, 0],
    X_transformed[num_positive:, 1],
    c=colors[num_positive:],
    label="Negative", alpha=0.4, s=1,
)
plt.scatter(
    X_transformed[:num_positive, 0], 
    X_transformed[:num_positive, 1], 
    c=colors[:num_positive], 
    label="Positive", alpha=0.4, s=1,
)
plt.title("eICU: PaCMAP Embedding for FR-14 Features")

## Distributions

In [None]:
from mimic_pipeline import utils, preprocess
import pandas as pd


mimic_union = pd.read_csv("data/MIMIC-WHOLE.csv")
preprocess.plot_distribution(mimic_union, column='bilirubin_max', label_column='hospital_expire_flag', stat='frequency')
preprocess.plot_distribution(mimic_union, column='bilirubin_max', label_column='hospital_expire_flag', xlim=(45, 60), ylim=(0, 50), stat='frequency')

In [None]:
mimic_union['bilirubin_max'].describe()

In [None]:
len(mimic_union[mimic_union['bilirubin_max'] >= 54])