Running the power calculations to determine the reduction in sample size.

In [None]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from statsmodels.stats.power import TTestIndPower
from scipy import stats
from matplotlib.lines import Line2D

In [None]:
## Global variables that are not passed to functions via arguments

effect_type = "%"
effect = 0.27

time = "24"
threshold = 0.5
power = 0.9
alpha = 0.05

In [None]:
# load outcome data, absolute change in cdrsb
cdrsb = pd.read_csv("", index_col=0)[time]

In [None]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.nanmean(a, 0), stats.sem(a, nan_policy="omit")
    h = se * stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

def plot_enrich_curve(axis, values, legend_label, color):
    # plot fraction curve
    axis.plot(values.columns, values.mean(), color=color, label=legend_label)
    # plot CI
    m, up, low = mean_confidence_interval(values, confidence=0.95)
    axis.fill_between(values.columns, list(up), list(low), color=color, alpha=0.3)

def calc_effect_size(t_arm, c_arm):
    t_arm = t_arm.copy()
    
    #print(t_arm) #debug
    
    # % effect
    if effect_type == "%":
        # only change those with values >0, so leave out ppl that improved or didnt change
        t_arm[t_arm>0] = t_arm[t_arm>0] - t_arm[t_arm>0] * effect
    # static effect
    else:
        t_arm[t_arm>0] = t_arm[t_arm>0] - effect
    cohens_d = (c_arm.mean() - t_arm.mean()) / np.sqrt((t_arm.std() + c_arm.std())/2)
    return cohens_d

In [None]:
def n_reduction_curve(enrich_models, outcome_data, power, alpha):
    reductions = dict()
    n_store = dict()
    n_slow_store = dict()
    es_store = dict()
    tresh = np.arange(0, 0.95, 0.05)
    # get IDs of patients which have predictions, this shifted the line down
    pred_ids = enrich_models[0].index
    # create treatment and control arm, restrict on patients that have predictions
    treat = outcome_data[pred_ids].dropna().copy()
    control = outcome_data[pred_ids].dropna().copy()
    d_full = calc_effect_size(treat, control)
    analysis = TTestIndPower()

    # iterate through repeats
    for d in enrich_models:
        for t in tresh:
            # stratify  ## intersection because not all of our predicted patients have cdrsb (nan)
            # plus not all with cdrsb have predictions
            strat_ids = d.loc[d["0"] > t].index.intersection(treat.index)
            treat_strat = treat.loc[strat_ids].copy()
            control_strat = control.loc[strat_ids].copy()
            if treat_strat.shape[0] >= 3:
                # calculate effect size
                d_strat = calc_effect_size(treat_strat, control_strat)
                # calculate sample size reduction
                result_strat = analysis.solve_power(d_strat, power=power, nobs1=None, ratio=1.0, alpha=alpha)
                result = analysis.solve_power(d_full, power=power, nobs1=None, ratio=1.0, alpha=alpha)
                reduction = (1-(result_strat/result))*100
            else:
                reduction = np.nan
            
            # get N above threshold
            n = treat_strat.shape[0]
            n_slow = d.loc[treat_strat.index, "Cluster"].sum()
            try: # exception to handle new thresholds from other repeat files
                reductions[t].append(reduction)
                n_store[t].append(n)
                n_slow_store[t].append(n_slow)
                es_store[t].append(d_strat)
            except KeyError:
                reductions[t] = [reduction]
                n_store[t] = [n]
                n_slow_store[t] = [n_slow]
                es_store[t] = [d_strat]  
    return reductions, n_store, n_slow_store, es_store

#### Multimodal

In [None]:
# get patients for enrichment and their scores
files = glob.glob("")
data = [pd.read_csv(file, index_col=0).rename(columns={"Score":"0"}) for file in files]
for df in data: # flip labels and probabilities to go for "fast"
    df["0"] = 1-df["0"]
    df["Cluster"].replace({0:1, 1:0}, inplace=True)

In [None]:
s_red, n, n_fast, effects = n_reduction_curve(data, cdrsb, power, alpha)
values = pd.DataFrame.from_dict(s_red, orient="index")
values_mm = values.sort_index().transpose()
n_mm = pd.DataFrame.from_dict(n, orient="index")
n_mm = n_mm.sort_index().transpose()
fast_mm = pd.DataFrame.from_dict(n_fast, orient="index")
fast_mm = fast_mm.sort_index().transpose()
es_mm = pd.DataFrame.from_dict(effects, orient="index")
es_mm = es_mm.sort_index().transpose()

#### Common

In [None]:
# create file names
files = glob.glob("")
data = [pd.read_csv(file, index_col=0) for file in files]
# the stuff below flips the labels such that the fast progressors are label=1
for df in data:
    df["0"] = 1-df["0"]
    df["Cluster"].replace({0:1, 1:0}, inplace=True)

In [None]:
s_red, n, n_fast, effects = n_reduction_curve(data, cdrsb, power, alpha)
values = pd.DataFrame.from_dict(s_red, orient="index")
values_cm = values.sort_index().transpose()
n_cm = pd.DataFrame.from_dict(n, orient="index")
n_cm = n_cm.sort_index().transpose()
fast_cm = pd.DataFrame.from_dict(n_fast, orient="index")
fast_cm = fast_cm.sort_index().transpose()
es_cm = pd.DataFrame.from_dict(effects, orient="index")
es_cm = es_cm.sort_index().transpose()

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(5, 5))

plot_enrich_curve(axes, values_cm.drop(0.8500000000000001, axis=1), legend_label="'Common'", color="#0D5C63")

# format plot
axes.set_title("Power analysis", loc="center", weight="bold", fontsize=12)
axes.set_ylabel("Sample size reduction (%)")
axes.set_xlabel("Classifier threshold")
#axes.legend(loc="upper left")

plt.savefig("../Figures/power_curve.png", dpi=350)

In [None]:
# check exact values / CIs for classifier thresholds
m, up, low = mean_confidence_interval(values_mm, confidence=0.95)
ci_mm = pd.DataFrame(values_mm.mean())
ci_mm["Low"] = low.data
ci_mm["Up"] = up.data
ci_mm

In [None]:
# check exact values / CIs for classifier thresholds
m, up, low = mean_confidence_interval(values_cm, confidence=0.95)
ci_cm = pd.DataFrame(values_cm.mean())
ci_cm["Low"] = low.data
ci_cm["Up"] = up.data
ci_cm