# Simulation analysis

In [3]:
import pandas as pd
import numpy as np

In [4]:
def table_maker(file):
    model_scores = pd.read_csv(file,index_col = 0)
    model_scores_quantiles = model_scores.groupby(["model","obs"]).quantile(q=[0.05,0.95]).reset_index()
    model_scores_mean = model_scores.groupby(["model","obs"]).mean().reset_index()
    model_scores_mean["level_2"] = 0
    
    table = (pd.concat([model_scores_quantiles,model_scores_mean])
   .sort_values(["obs","level_2"]))
    table = table.assign(
        level_2 = np.where(table["level_2"] == 0,"mean",table["level_2"]),
        obs = table["obs"]/3*2
    ).rename({"level_2": "Quartile/Mean"})
    table = table.pivot(index="model", columns=["obs","level_2"], values="score")
    table["index"] = [6,4,5,1,2,3,12,11,10,9,8,7,0]
    table = table.sort_values("index")
    table = table.drop(columns = "index")
    table.columns = table.columns.rename("Cohort Size", level=0)
    table.columns = table.columns.rename("Quantile/Mean", level=1)
    table.index = table.index.rename("Model")
    return(table)

## Simple linear simulation

In [180]:
simple_linear_concordance = table_maker("simple_linear_simulation.csv")
simple_linear_concordance

Cohort Size,250.0,250.0,250.0,500.0,500.0,500.0,1000.0,1000.0,1000.0
Quantile/Mean,mean,0.05,0.95,mean,0.05,0.95,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Unweighted Cox PH,0.882613,0.840876,0.915608,0.884823,0.855939,0.907597,0.886995,0.868649,0.903339
"Cox PH, Barlow weights",0.872666,0.831851,0.904693,0.881695,0.850701,0.902508,0.884927,0.865513,0.902102
"Cox PH, Prentice weights",0.874527,0.834516,0.905848,0.882058,0.850849,0.902772,0.885024,0.86574,0.902123
"Cox PH, Self-Prentice weights",0.87341,0.836375,0.911429,0.88234,0.851145,0.903747,0.8859,0.868029,0.902434
"Cox PH Lasso, Barlow weights",0.873015,0.831851,0.904693,0.881695,0.850701,0.902508,0.884927,0.865513,0.902102
"Cox PH Ridge, Barlow weights",0.872564,0.831851,0.904693,0.881695,0.850701,0.902508,0.884927,0.865513,0.902102
"Cox PH Elastic Net, Barlow weights",0.872853,0.831851,0.904873,0.881695,0.850701,0.902508,0.884927,0.865513,0.902102
"Survival Tree, unweighted",0.775623,0.709468,0.835016,0.795075,0.75121,0.833831,0.804575,0.776988,0.835237
"Survival Tree, random oversampler",0.767902,0.699969,0.830443,0.781088,0.73502,0.826047,0.792547,0.761633,0.823057
"Survival Tree, SMOTENC",0.768535,0.697114,0.840725,0.784994,0.731857,0.832949,0.796641,0.768016,0.83165


In [181]:
with open('../Project_file/tables/simple_linear_sim_table.tex', 'w') as tf:
     tf.write(simple_linear_concordance.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                            caption = "Concordance scores over 100 simulations from a Weibull distribution with a linear predictor containing 6 covariates. All Cox models performed similarly, while survival trees and random survival forests performed significantly worse."))

In [5]:
simple_linear_brier = table_maker("simple_linear_simulation_brier.csv")
simple_linear_brier

Cohort Size,250.0,250.0,250.0,500.0,500.0,500.0,1000.0,1000.0,1000.0
Quantile/Mean,mean,0.05,0.95,mean,0.05,0.95,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Unweighted Cox PH,0.049228,0.039594,0.062433,0.044206,0.035772,0.053058,0.040379,0.035668,0.045974
"Cox PH, Barlow weights",0.053317,0.039645,0.073837,0.047311,0.036798,0.058813,0.042948,0.036087,0.051309
"Cox PH, Prentice weights",0.049318,0.039137,0.064705,0.043095,0.034577,0.052679,0.038771,0.032724,0.044635
"Cox PH, Self-Prentice weights",0.050459,0.038476,0.067171,0.043504,0.034919,0.053226,0.03868,0.033257,0.04561
"Cox PH Lasso, Barlow weights",0.053551,0.039645,0.073885,0.047311,0.036798,0.058813,0.042948,0.036087,0.051309
"Cox PH Ridge, Barlow weights",0.053317,0.039645,0.073837,0.047311,0.036798,0.058813,0.042948,0.036087,0.051309
"Cox PH Elastic Net, Barlow weights",0.053367,0.039645,0.073837,0.047311,0.036798,0.058813,0.042948,0.036087,0.051309
"Survival Tree, unweighted",0.101739,0.074774,0.128501,0.089052,0.072932,0.110275,0.082723,0.070962,0.096465
"Survival Tree, random oversampler",0.10311,0.074605,0.140989,0.093347,0.074978,0.113132,0.084102,0.0713,0.100117
"Survival Tree, SMOTENC",0.096272,0.072018,0.125547,0.085278,0.069446,0.101508,0.077186,0.066352,0.089716


Finding the difference in performance compared to the baseline for each run:

In [6]:
with open('../Project_file/tables/simple_linear_brier_sim_table.tex', 'w') as tf:
     tf.write(simple_linear_brier.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                            caption = "Integrated Brier scores over 100 simulations from a Weibull distribution with a linear predictor containing 6 covariates. All Cox models performed similarly, while survival trees and random survival forests performed significantly worse."))

## Exponential REDO

In [7]:
exponential_concordance = table_maker("exponential_sim.csv")
exponential_concordance

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.931041,0.887645,0.967134
"Cox PH, Barlow weights",0.931041,0.887645,0.967134
"Cox PH, Prentice weights",0.931041,0.887645,0.967134
"Cox PH, Self-Prentice weights",0.931041,0.887645,0.967134
"Cox PH Lasso, Barlow weights",0.931041,0.887645,0.967134
"Cox PH Ridge, Barlow weights",0.931041,0.887645,0.967134
"Cox PH Elastic Net, Barlow weights",0.931041,0.887645,0.967134
"Survival Tree, unweighted",0.894599,0.842052,0.943662
"Survival Tree, random oversampler",0.888519,0.831978,0.931214
"Survival Tree, SMOTENC",0.883674,0.828805,0.932441


In [187]:
with open('../Project_file/tables/exponential_sim_table.tex', 'w') as tf:
     tf.write(exponential_concordance.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                             caption = "Concordance scores over 100 simulations from a Weibull distribution with a predictor of the form $\exp(2 x_i)$. Cox models had a slightly higher performance on average than surival trees or random survival forests."))

In [8]:
exponential_brier = table_maker("exponential_simulation_brier.csv")
exponential_brier

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.044799,0.03658,0.05474
"Cox PH, Barlow weights",0.047298,0.036388,0.060084
"Cox PH, Prentice weights",0.043506,0.034453,0.053999
"Cox PH, Self-Prentice weights",0.04407,0.035794,0.054248
"Cox PH Lasso, Barlow weights",0.047298,0.036388,0.060084
"Cox PH Ridge, Barlow weights",0.047298,0.036388,0.060084
"Cox PH Elastic Net, Barlow weights",0.047298,0.036388,0.060084
"Survival Tree, unweighted",0.089197,0.072403,0.110145
"Survival Tree, random oversampler",0.09205,0.077214,0.111072
"Survival Tree, SMOTENC",0.084969,0.06904,0.101038


In [10]:
with open('../Project_file/tables/exponential_brier.tex', 'w') as tf:
     tf.write(exponential_brier.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                             caption = "Integrated Brier scores over 100 simulations from a Weibull distribution with a predictor of the form $\exp(2 x_i)$. Cox models had a slightly higher performance on average than surival trees or random survival forests."))

## Interaction

In [5]:
interaction_concordance = table_maker("interaction_sim.csv")
interaction_concordance

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.679395,0.611345,0.751464
"Cox PH, Barlow weights",0.679615,0.611972,0.751786
"Cox PH, Prentice weights",0.679616,0.611968,0.751792
"Cox PH, Self-Prentice weights",0.676624,0.606486,0.753238
"Cox PH Lasso, Barlow weights",0.679493,0.611972,0.751786
"Cox PH Ridge, Barlow weights",0.67958,0.611045,0.751786
"Cox PH Elastic Net, Barlow weights",0.679283,0.611972,0.751786
"Survival Tree, unweighted",0.832272,0.791731,0.874411
"Survival Tree, random oversampler",0.820288,0.77806,0.861663
"Survival Tree, SMOTENC",0.82307,0.784212,0.867218


In [6]:
with open('../Project_file/tables/interaction_sim_table.tex', 'w') as tf:
     tf.write(interaction_concordance.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
             caption = "Concordance scores over 100 Weibull simulations with two linear predictors and an interaction variable. Survival trees and random survival forests performed significantly better than Cox methods."))

In [190]:
interaction_brier = table_maker("interaction_simulation_brier.csv")
interaction_brier

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.101871,0.087242,0.116638
"Cox PH, Barlow weights",0.084401,0.065469,0.109201
"Cox PH, Prentice weights",0.090266,0.076094,0.106787
"Cox PH, Self-Prentice weights",0.080365,0.063117,0.105584
"Cox PH Lasso, Barlow weights",0.086239,0.06707,0.109201
"Cox PH Ridge, Barlow weights",0.084686,0.065469,0.109201
"Cox PH Elastic Net, Barlow weights",0.085203,0.065469,0.11166
"Survival Tree, unweighted",0.066188,0.053101,0.083156
"Survival Tree, random oversampler",0.066999,0.051123,0.083548
"Survival Tree, SMOTENC",0.069436,0.055086,0.087276


Finding the difference in performance compared to the baseline for each run:

In [191]:
with open('../Project_file/tables/interaction_brier_sim_table.tex', 'w') as tf:
     tf.write(interaction_brier.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                            caption = "Integrated Brier scores over 100 Weibull simulations with two linear predictors and an interaction variable. All Cox models performed similarly, significantly worse than survival trees and random forests."))

# Accelerated failure time 

In [13]:
AFT_concordance = table_maker("accelerated_time_sim2.csv")
AFT_concordance

Cohort Size,500.0,500.0,500.0,1000.0,1000.0,1000.0
Quantile/Mean,mean,0.05,0.95,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Unweighted Cox PH,0.044827,0.032239,0.058092,0.959065,0.950389,0.967692
"Cox PH, Barlow weights",0.053988,0.037361,0.074403,0.957764,0.949033,0.967179
"Cox PH, Prentice weights",0.045074,0.03331,0.059676,0.95801,0.949309,0.967548
"Cox PH, Self-Prentice weights",0.046003,0.0336,0.060682,0.958057,0.949032,0.967321
"Cox PH Lasso, Barlow weights",0.054062,0.037361,0.074403,0.957764,0.949033,0.967179
"Cox PH Ridge, Barlow weights",0.053988,0.037361,0.074403,0.957764,0.949033,0.967179
"Cox PH Elastic Net, Barlow weights",0.053988,0.037361,0.074403,0.957764,0.949033,0.967179
"Survival Tree, unweighted",0.109981,0.080448,0.142895,0.904297,0.882387,0.928994
"Survival Tree, random oversampler",0.118752,0.082864,0.156267,0.888011,0.860454,0.916911
"Survival Tree, SMOTENC",0.119553,0.08218,0.173219,0.869582,0.81989,0.909793


In [194]:
with open('../Project_file/tables/accelerated2_sim_table.tex', 'w') as tf:
     tf.write(AFT_concordance.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                             caption = "Concordance scores for 100  simulations from an accelerated failure time model with a log-normal error distribution and a linear predictor with 10 covariates. All Cox models performed similarly and not significantly differently from the baseline. Both survival trees and random survival forests performed significantly worse than the baseline."))

In [14]:
AFT_brier = table_maker("accelerated_time_simulation_brier.csv")
AFT_brier

Cohort Size,500.0,500.0,500.0,1000.0,1000.0,1000.0
Quantile/Mean,mean,0.05,0.95,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Unweighted Cox PH,0.044827,0.032239,0.058092,0.041734,0.039785,0.043683
"Cox PH, Barlow weights",0.053988,0.037361,0.074403,0.046902,0.045261,0.048543
"Cox PH, Prentice weights",0.045074,0.03331,0.059676,0.041527,0.041313,0.041741
"Cox PH, Self-Prentice weights",0.046003,0.0336,0.060682,0.041899,0.039831,0.043968
"Cox PH Lasso, Barlow weights",0.054062,0.037361,0.074403,0.046902,0.045261,0.048543
"Cox PH Ridge, Barlow weights",0.053988,0.037361,0.074403,0.046902,0.045261,0.048543
"Cox PH Elastic Net, Barlow weights",0.053988,0.037361,0.074403,0.046902,0.045261,0.048543
"Survival Tree, unweighted",0.109981,0.080448,0.142895,0.087682,0.08235,0.093015
"Survival Tree, random oversampler",0.118752,0.082864,0.156267,0.096055,0.093845,0.098266
"Survival Tree, SMOTENC",0.119553,0.08218,0.173219,0.093697,0.069927,0.117467


In [197]:
with open('../Project_file/tables/acceleration_time_brier_sim_table.tex', 'w') as tf:
     tf.write(AFT_brier.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                            caption = "Integrated Brier scores over 100 accelerated failure time simulations. Cox models performed similarly and better than survival trees and random survival forests."))

## High dimensionality

In [12]:
high_dimension_concordance = table_maker("high_dimension.csv")
high_dimension_concordance

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.618924,0.566194,0.672089
"Cox PH, Barlow weights",0.599736,0.537572,0.666147
"Cox PH, Prentice weights",0.603329,0.541832,0.66712
"Cox PH, Self-Prentice weights",0.584607,0.514887,0.652012
"Cox PH Lasso, Barlow weights",0.614102,0.557766,0.673962
"Cox PH Ridge, Barlow weights",0.667494,0.621211,0.722891
"Cox PH Elastic Net, Barlow weights",0.652793,0.605806,0.706531
"Survival Tree, unweighted",0.560096,0.501094,0.6153
"Survival Tree, random oversampler",0.56325,0.516418,0.620235
"Survival Tree, SMOTENC",0.558561,0.495294,0.617776


In [13]:
with open('../Project_file/tables/high_dimension_table.tex', 'w') as tf:
     tf.write(high_dimension_concordance.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                                                  caption = "Concordance scores over 100 Weibull simulations with a large number of covariates. Ridge and Elastic Net Cox models provide the highest average concordance scores, followed by random oversampled random survival forests."))

In [7]:
high_dimension_brier = table_maker("high_dimension_brier.csv")
high_dimension_brier

Cohort Size,500.0,500.0,500.0
Quantile/Mean,mean,0.05,0.95
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Unweighted Cox PH,0.073582,0.054737,0.093657
"Cox PH, Barlow weights",0.072109,0.051425,0.094274
"Cox PH, Prentice weights",0.074861,0.05395,0.094199
"Cox PH, Self-Prentice weights",0.09282,0.060915,0.126392
"Cox PH Lasso, Barlow weights",0.059934,0.044251,0.076005
"Cox PH Ridge, Barlow weights",0.057334,0.043049,0.072502
"Cox PH Elastic Net, Barlow weights",0.057,0.043124,0.074075
"Survival Tree, unweighted",0.144337,0.117665,0.169353
"Survival Tree, random oversampler",0.143554,0.121243,0.171166
"Survival Tree, SMOTENC",0.141871,0.117928,0.169476


In [14]:
with open('../Project_file/tables/high_dimension_brier_table.tex', 'w') as tf:
     tf.write(high_dimension_brier.to_latex(float_format="%.3f", bold_rows = True,multicolumn_format = "c",
                                                  caption = "Integrated Brier scores over 100 Weibull simulations with a large number of covariates. Penalised Cox had the lowest Brier scores, followed by random oversampled random survival forests."))