# Simple Linear Simulation test

Setup: import model, simulation and scoring functions.

In [2]:
from models import *
from data_simulation import *
from score_functions import *

import pandas as pd
import numpy as np

## Number of observations

First, we look at how the performance varies over number of observations. First look at concordance scores.

Creating data frame to record model scores

In [5]:
# model_scores = pd.DataFrame({
#     "model": [],
#     "obs": [],
#     "score": []
# })
# model_scores

Loading previously collected data if it exists: 

In [5]:
model_scores = pd.read_csv("simple_linear_simulation.csv",index_col = 0)

In [6]:
# Parameters
betas = [0.1,1,2,0.1,1,2]
n_covariates = len(betas)
prop_cat = 0.5 # proportino of variables categorical
censor_prop = 0.7 # proportion of individuals censored

for obs in [1500]:
    for reps in range(0,100):
        print(obs,reps)
        # saving calculated scores to file
        model_scores.to_csv("simple_linear_simulation.csv")
        
        #sample
        sample = weibull_simple_linear_sim(betas, prop_cat , obs, censor_prop, pi = 0.5)
        cases, subcohort, cohort, test = cch_splitter(sample)
        
        # unweighted Cox model
        cph1 = fit_cox(cases, subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Unweighted Cox PH",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph1,lifelines = True)},
            ignore_index = True
        )
        
        # Cox model with Barlow weights
        cph2 = fit_cox_barlow(cases, subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Cox PH, Barlow weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph2,lifelines = True)},
            ignore_index = True
        )
        
        # Cox Prentice
        cph3 = fit_cox_prentice(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Cox PH, Prentice weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph3,lifelines = True)},
            ignore_index = True
        )
        
        # Cox Self-Prentice
        cph4 = fit_cox_self_prentice(cases, subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Cox PH, Self-Prentice weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph4,lifelines = True)},
            ignore_index = True
        )
        
        # Ridge Cox Barlow weights
        cph5 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 0, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH Ridge, Barlow weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph5,lifelines = True)},
            ignore_index = True
        )
        
        # Lasso Cox Barlow weights
        cph6 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 1, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH LASSO, Barlow weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph6,lifelines = True)},
            ignore_index = True
        )
        
        # Elastic net Cox Barlow weights
        cph7 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 0.5, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH Elastic Net, Barlow weights",
            "obs": obs,
            "score": concordance_score(n_covariates,test,cph7,lifelines = True)},
            ignore_index = True
        )
        
        # Unweighted survival tree
        tree = unweighted_tree(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Survival Tree, unweighted",
            "obs": obs,
            "score": concordance_score(n_covariates,test,tree)},
            ignore_index = True
        )
    
        # Survival tree, random oversampling
        tree1 = ros_tree(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Survival Tree, random oversampler",
            "obs": obs,
            "score": concordance_score(n_covariates,test,tree1)},
            ignore_index = True
        )
        
        # Survival tree, SMOTENC
        tree2 = smotenc_tree(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Survival Tree, SMOTENC",
            "obs": obs,
            "score": concordance_score(n_covariates,test,tree2)},
            ignore_index = True
        )
        
        # Unweighted random survival forest
        rsf = unweighted_rsf(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, unweighted",
            "obs": obs,
            "score": concordance_score(n_covariates,test,rsf)},
            ignore_index = True
        )
        
        # Random survival forest, random oversampling
        rsf1 = ros_rsf(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, random oversampler",
            "obs": obs,
            "score": concordance_score(n_covariates,test,rsf1)},
            ignore_index = True
        )
        
        # Random suvival forests, SMOTENC
        rsf2 = smotenc_rsf(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, SMOTENC",
            "obs": obs,
            "score": concordance_score(n_covariates,test,rsf2)},
            ignore_index = True
        )


        
        

1500 0
1500 1
1500 2
1500 3
1500 4
1500 5
1500 6
1500 7
1500 8
1500 9
1500 10
1500 11
1500 12
1500 13
1500 14
1500 15
1500 16
1500 17
1500 18
1500 19
1500 20
1500 21
1500 22
1500 23
1500 24
1500 25
1500 26
1500 27
1500 28
1500 29
1500 30
1500 31
1500 32
1500 33
1500 34
1500 35
1500 36
1500 37
1500 38
1500 39
1500 40
1500 41
1500 42
1500 43
1500 44
1500 45
1500 46
1500 47
1500 48
1500 49
1500 50
1500 51
1500 52
1500 53
1500 54
1500 55
1500 56
1500 57
1500 58
1500 59
1500 60
1500 61
1500 62
1500 63
1500 64
1500 65
1500 66
1500 67
1500 68
1500 69
1500 70
1500 71
1500 72
1500 73
1500 74
1500 75
1500 76
1500 77
1500 78


Now looking at Brier scores:

In [7]:
# model_scores = pd.DataFrame({
#     "model": [],
#     "obs": [],
#     "score": []
# })

In [7]:
# model_scores = pd.read_csv("simple_linear_simulation.csv",index_col = 0)

Unnamed: 0,model,obs,score


In [3]:
# parameters
betas = [0.1,1,2,0.1,1,2]
n_covariates = len(betas)
prop_cat = 0.5
censor_prop = 0.7

for obs in [1500]:
# for obs in [1500]:
    for reps in range(0,100):
        print(obs,reps)
        model_scores = pd.read_csv("simple_linear_simulation_brier.csv",index_col = 0)
        
        #sample
        sample = weibull_simple_linear_sim(betas, prop_cat , obs, censor_prop, pi = 0.5)
        cases, subcohort, cohort, test = cch_splitter(sample)
        
        # unweighted Cox model
        cph1 = fit_cox(cases, subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Unweighted Cox PH",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph1,lifelines = True)},
            ignore_index = True
        )
        
        # Cox model with Barlow weights
        cph2 = fit_cox_barlow(cases, subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Cox PH, Barlow weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph2,lifelines = True)},
            ignore_index = True
        )
        
        # Cox model with Prentice weights
        cph3 = fit_cox_prentice(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Cox PH, Prentice weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph3,lifelines = True)},
            ignore_index = True
        )
        
        # Cox model with Self-Prentice weights
        cph4 = fit_cox_self_prentice(cases, subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Cox PH, Self-Prentice weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph4,lifelines = True)},
            ignore_index = True
        )
        
        # Ridge Cox Model, Barlow weights
        cph5 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 0, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH Ridge, Barlow weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph5,lifelines = True)},
            ignore_index = True
        )
        
        # Lasso Cox model, Barlow weights
        cph6 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 1, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH Lasso, Barlow weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph6,lifelines = True)},
            ignore_index = True
        )
        
        # Elastic net Cox model, Barlow weights
        cph7 = fit_pen_cox_barlow(cases, subcohort,n_covariates, obs, l1_ratio = 0.5, penalizer_show = False)
        model_scores = model_scores.append({
            "model": "Cox PH Elastic Net, Barlow weights",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,cph7,lifelines = True)},
            ignore_index = True
        )
        
        # Unweighted survival tree
        tree = unweighted_tree(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Survival Tree, unweighted",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,tree,lifelines = False)},
            ignore_index = True
        )
    
        # Survival tree, random oversampling
        tree1 = ros_tree(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Survival Tree, random oversampler",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,tree1,lifelines = False)},
            ignore_index = True
        )
        
        # Survival tree, SMOTENC
        tree2 = smotenc_tree(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Survival Tree, SMOTENC",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,tree2,lifelines = False)},
            ignore_index = True
        )
        
        # Unweighted random survival forest
        rsf = unweighted_rsf(cases,subcohort,n_covariates)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, unweighted",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,rsf,lifelines = False)},
            ignore_index = True
        )
        
        # Random survival forest, random oversampling
        rsf1 = ros_rsf(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, random oversampler",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,rsf1,lifelines = False)},
            ignore_index = True
        )
        
        # Random survival forest, SMOTENC
        rsf2 = smotenc_rsf(cases,subcohort,n_covariates,obs)
        model_scores = model_scores.append({
            "model": "Random Survival Forest, SMOTENC",
            "obs": obs,
            "score": int_brier_score(cases,subcohort,cohort,test,n_covariates,rsf2,lifelines = False)},
            ignore_index = True
        )
        
        model_scores.to_csv("simple_linear_simulation_brier.csv")

        
        

1500 0
1500 1
1500 2
1500 3
1500 4
1500 5
1500 6
1500 7
1500 8
1500 9
1500 10
1500 11
1500 12
1500 13
1500 14
1500 15
1500 16
1500 17
1500 18
1500 19
1500 20
1500 21
1500 22
1500 23
1500 24
1500 25
1500 26
1500 27
1500 28
1500 29
1500 30
1500 31
1500 32
1500 33
1500 34
1500 35
1500 36
1500 37
1500 38
1500 39
1500 40
1500 41
1500 42
1500 43
1500 44
1500 45
1500 46
1500 47
1500 48
1500 49
1500 50
1500 51
1500 52
1500 53
1500 54
1500 55
1500 56
1500 57
1500 58
1500 59
1500 60
1500 61
1500 62
1500 63
1500 64
1500 65
1500 66
1500 67
1500 68
1500 69
1500 70
1500 71
1500 72
1500 73
1500 74
1500 75
1500 76
1500 77
1500 78
1500 79
