# Weibull Simple linear predictor 2

In [67]:
import numpy as np
from scipy.stats import weibull_min # r weibull simulation
from scipy.stats import norm # for covariate simulation
from scipy.stats import gamma # for weibull shape parameter
from scipy.stats import bernoulli # for censoring
from scipy.stats import uniform
from scipy.stats.mstats import mquantiles
import pandas as pd

## Simulated data

In [68]:
def weibull_simple_linear_sim(n_beta, prop_cat, obs, censor_prop, show_beta = False, sigma = 1):
    betas = norm.rvs(scale = sigma, size = n_beta) #generates the coefficients
    X_norm = norm.rvs(size = obs*int(np.floor(n_beta*(1-prop_cat)))).reshape((obs,int(np.floor(n_beta*(1-prop_cat))))) # matrix of normal covariates
    X_cat = bernoulli.rvs(p = 0.5,size = obs*int(np.ceil(n_beta*(prop_cat)))).reshape((obs,int(np.ceil(n_beta*(prop_cat))))) # matrix of categorical) covariates
    X = np.hstack([X_norm,X_cat])
    
    c = uniform.rvs(size = 1, loc = 0.5, scale = 4.5) # shape parameter of weibull
    
    lin_pred = np.matmul(X,betas) # linear predictor
    
    sim_data = pd.DataFrame(X)
    sim_data["y"] = weibull_min.rvs(c, scale = np.exp(-lin_pred/c)) # simulating survival times from weibull distribution
    
    dropout_prop = uniform.rvs(size = 1, scale = 0.5)*censor_prop # proportion that will be censored by dropping out of the study
    sim_data["dropout"] = bernoulli.rvs(size = obs, p = dropout_prop) == 1 #indicator for subject droppoing out
    sim_data["dropout_time"] = np.where(sim_data["dropout"], uniform.rvs(scale = sim_data["y"]), sim_data["y"])
    
    max_time = float(mquantiles(sim_data["dropout_time"], prob = 1 - censor_prop)) # quantile above which we censor
    sim_data = sim_data.assign(
        end_censor = lambda df: df["dropout_time"].map(lambda dropout_time: dropout_time > max_time), # indicator for censoring because of study ending
        time = lambda df: df["dropout_time"].map(lambda dropout_time: min(dropout_time,max_time)), # censoring any times above max time
        event = lambda df: ~(df["dropout"] | df["end_censor"])
    )
    
    if show_beta:
        print(betas)
        return(sim_data)
    else:
        return(sim_data)

Testing the function:

In [69]:
weibull_simple_linear_sim(10, 0.5, 10, 0.8, show_beta = True, sigma = 1)

[-1.01010358  1.25214201 -0.5743592   0.11838605 -0.43534297  0.73922308
  0.43297165 -1.0125801   1.1665707  -1.27221693]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y,dropout,dropout_time,end_censor,time,event
0,0.422934,-0.81899,-0.203005,0.01346,-0.612238,0.0,0.0,1.0,1.0,1.0,1.856445,False,1.856445,True,0.212973,False
1,0.917852,0.420769,-0.860256,-0.443795,1.457332,0.0,0.0,0.0,0.0,1.0,0.921308,True,0.103437,False,0.103437,False
2,0.424371,0.763135,-0.658967,0.105707,-1.916156,0.0,1.0,1.0,0.0,0.0,0.690189,True,0.538825,True,0.212973,False
3,0.585137,-0.396136,1.267027,-0.44832,0.50154,0.0,0.0,1.0,1.0,1.0,1.672466,True,1.495173,True,0.212973,False
4,1.732819,-2.532042,1.66019,-1.683145,-1.155505,0.0,0.0,1.0,0.0,1.0,5.493418,False,5.493418,True,0.212973,False
5,-2.852285,-1.020103,0.162511,-0.442184,0.340905,0.0,1.0,0.0,0.0,0.0,0.667641,False,0.667641,True,0.212973,False
6,-0.449862,-0.603468,-1.623078,0.625805,-1.539407,1.0,0.0,0.0,1.0,0.0,0.346039,False,0.346039,True,0.212973,False
7,-0.380758,1.243948,0.550809,0.813729,0.542678,1.0,1.0,0.0,1.0,1.0,0.197649,True,0.10842,False,0.10842,False
8,1.783659,0.54129,-0.793805,0.053552,0.094911,1.0,1.0,1.0,0.0,1.0,0.996149,True,0.417465,True,0.212973,False
9,1.675224,1.798781,1.653008,-0.21469,0.339048,0.0,0.0,0.0,0.0,1.0,1.198554,False,1.198554,True,0.212973,False


## Test analysis

### Case-subcohort

To test, sample a dataset from the sampler:

In [70]:
sample = weibull_simple_linear_sim(10, 0.5, 1500, 0.6, show_beta = True, sigma = 1)
sample

[-0.43199613 -0.65686558 -0.94930477  1.24894282  0.96146964  1.20398661
  0.46627411 -0.59149289 -0.25290447 -0.30480965]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y,dropout,dropout_time,end_censor,time,event
0,0.258817,0.290809,-1.033868,0.043459,-1.993710,1.0,0.0,1.0,1.0,0.0,0.689137,False,0.689137,True,0.594752,False
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0,0.464061,False,0.464061,False,0.464061,True
2,0.269814,0.592702,-1.914707,0.913435,-0.885676,0.0,1.0,0.0,0.0,1.0,0.563128,True,0.133939,False,0.133939,False
3,-0.873534,-0.768994,-1.759941,-1.131725,0.361836,0.0,0.0,1.0,1.0,0.0,0.919677,False,0.919677,True,0.594752,False
4,-0.405061,-0.285142,-0.666357,0.301234,0.812583,0.0,0.0,0.0,1.0,1.0,0.671518,False,0.671518,True,0.594752,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,-0.495976,-0.587850,-0.773309,-1.009303,-0.272919,1.0,1.0,1.0,1.0,0.0,0.958283,False,0.958283,True,0.594752,False
1496,0.418295,-0.852769,1.020421,0.005004,-0.009457,1.0,0.0,1.0,1.0,0.0,1.161690,False,1.161690,True,0.594752,False
1497,0.660563,0.903730,1.606314,0.262362,-0.643334,0.0,0.0,1.0,0.0,1.0,3.363501,True,3.173903,True,0.594752,False
1498,1.032771,-0.733712,-0.474936,0.838676,1.823454,0.0,1.0,0.0,1.0,1.0,0.258619,True,0.158379,False,0.158379,False


Function for splitting data samples:

In [71]:
def cch_splitter(sample):
    cohort = sample.iloc[0:int(round(2*len(sample)/3))] # subsetting the cohort
    cases = cohort[cohort['event'] == True] # subsetting cases in the cohort
    subcohort = cohort.sample(n = len(cases))
    
    test = sample.iloc[int(np.round(2*len(sample)/3)):len(sample)] # subsetting the test set
    
    return(cases, subcohort, cohort, test)

Function for changing data for Cox model with Barlow weights:

In [72]:
def barlow_trans(cases,subcohort):
    order = int(np.floor(np.log(max(cases["time"]))/np.log(10))) # deciding what the granularity will be
    cases = cases.assign(
        time = round(cases["time"],- order + 5),
        start_time = lambda df: df["time"] - 10**-(- order + 5), # setting events outside subcohort to start just before they occur
        subcohort = False # showing that these are the cases chosen outside of the subcohort
    )
    cases = cases.query("start_time > 0") #filtering out readings with negative start times
    
    subcohort = subcohort.assign(
        time = lambda df: np.where(df["event"], df["time"] - 10**-(- order + 5), df["time"]), # if it is a case, the weight should be the same as the subcohort until close to the time of the event. 
        start_time = 0, # the events start from the origin
        event = False,
        subcohort = True
    ) 

    return(pd.concat([cases,subcohort]))
    

Splitting the test data:

In [73]:
cases, subcohort, cohort, test = cch_splitter(sample)

In [74]:
case_subcohort = barlow_trans(cases,subcohort)

## Cox PH

In [75]:
from lifelines import CoxPHFitter

In [76]:
cph = CoxPHFitter()

In [77]:
case_subcohort_df = case_subcohort[[i for i in range(0,10)]+["start_time", "time", "event"]]
case_subcohort_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,start_time,time,event
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0,0.464060,0.464061,True
6,-0.150383,0.508230,-0.953275,1.426950,1.803416,1.0,1.0,0.0,1.0,0.0,0.242058,0.242059,True
9,-0.223404,-0.792886,-0.935734,-0.501477,1.976391,1.0,0.0,0.0,1.0,0.0,0.550374,0.550375,True
10,-1.711492,-1.429028,-1.581501,-0.085552,0.414163,0.0,1.0,1.0,0.0,1.0,0.452304,0.452305,True
12,-0.494873,-0.435093,-1.118524,0.763039,0.039362,1.0,1.0,1.0,0.0,0.0,0.488330,0.488331,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,0.009606,-0.595212,1.706497,-0.220094,-0.864048,0.0,0.0,0.0,1.0,0.0,0.000000,0.594752,False
460,0.065113,1.421616,-0.983576,0.108782,0.360602,1.0,0.0,1.0,0.0,0.0,0.000000,0.594752,False
821,-0.922213,0.327069,-0.415452,0.349362,-0.841031,1.0,0.0,0.0,1.0,1.0,0.000000,0.594752,False
622,0.218759,-1.109913,-0.221019,0.062210,0.955832,0.0,0.0,0.0,1.0,0.0,0.000000,0.518193,False


In [78]:
case_subcohort[["start_time","time","event"]]

Unnamed: 0,start_time,time,event
1,0.464060,0.464061,True
6,0.242058,0.242059,True
9,0.550374,0.550375,True
10,0.452304,0.452305,True
12,0.488330,0.488331,True
...,...,...,...
356,0.000000,0.594752,False
460,0.000000,0.594752,False
821,0.000000,0.594752,False
622,0.000000,0.518193,False


In [79]:
samp_fraction = len(subcohort)/len(cohort)
samp_fraction

0.22

In [80]:
case_subcohort_df["weights"] = np.where(case_subcohort["subcohort"],1/samp_fraction,1)
case_subcohort_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_subcohort_df["weights"] = np.where(case_subcohort["subcohort"],1/samp_fraction,1)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,start_time,time,event,weights
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0,0.464060,0.464061,True,1.000000
6,-0.150383,0.508230,-0.953275,1.426950,1.803416,1.0,1.0,0.0,1.0,0.0,0.242058,0.242059,True,1.000000
9,-0.223404,-0.792886,-0.935734,-0.501477,1.976391,1.0,0.0,0.0,1.0,0.0,0.550374,0.550375,True,1.000000
10,-1.711492,-1.429028,-1.581501,-0.085552,0.414163,0.0,1.0,1.0,0.0,1.0,0.452304,0.452305,True,1.000000
12,-0.494873,-0.435093,-1.118524,0.763039,0.039362,1.0,1.0,1.0,0.0,0.0,0.488330,0.488331,True,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,0.009606,-0.595212,1.706497,-0.220094,-0.864048,0.0,0.0,0.0,1.0,0.0,0.000000,0.594752,False,4.545455
460,0.065113,1.421616,-0.983576,0.108782,0.360602,1.0,0.0,1.0,0.0,0.0,0.000000,0.594752,False,4.545455
821,-0.922213,0.327069,-0.415452,0.349362,-0.841031,1.0,0.0,0.0,1.0,1.0,0.000000,0.594752,False,4.545455
622,0.218759,-1.109913,-0.221019,0.062210,0.955832,0.0,0.0,0.0,1.0,0.0,0.000000,0.518193,False,4.545455


In [81]:
len(case_subcohort)

440

In [82]:
cph.fit(case_subcohort_df, entry_col = "start_time", duration_col = "time",event_col = "event",weights_col = "weights",robust = True)

<lifelines.CoxPHFitter: fitted with 1220 total observations, 1000 right-censored observations>

In [83]:
cph.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'time'
event col,'event'
weights col,'weights'
entry col,'start_time'
robust variance,True
baseline estimation,breslow
number of observations,1220
number of events observed,220
partial log-likelihood,-1060.55

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
0,-0.31,0.74,0.11,-0.52,-0.09,0.6,0.91,0.0,-2.81,<0.005,7.67
1,-1.31,0.27,0.19,-1.69,-0.94,0.19,0.39,0.0,-6.89,<0.005,37.42
2,-1.53,0.22,0.23,-1.99,-1.08,0.14,0.34,0.0,-6.57,<0.005,34.25
3,1.95,7.05,0.33,1.31,2.6,3.69,13.48,0.0,5.91,<0.005,28.11
4,1.49,4.44,0.18,1.14,1.85,3.11,6.33,0.0,8.23,<0.005,52.26
5,1.91,6.78,0.38,1.17,2.66,3.22,14.27,0.0,5.04,<0.005,21.02
6,0.95,2.57,0.31,0.34,1.55,1.41,4.7,0.0,3.08,<0.005,8.94
7,-0.89,0.41,0.46,-1.8,0.01,0.17,1.01,0.0,-1.93,0.05,4.24
8,-1.18,0.31,0.44,-2.03,-0.32,0.13,0.73,0.0,-2.69,0.01,7.12
9,-1.07,0.34,0.38,-1.82,-0.32,0.16,0.72,0.0,-2.81,<0.005,7.65

0,1
Concordance,0.50
Partial AIC,2141.09
log-likelihood ratio test,782.73 on 10 df
-log2(p) of ll-ratio test,534.74


In [84]:
test_preds = cph.predict_partial_hazard(test[range(0,10)])
test_preds

1000     0.000061
1001     0.002091
1002     1.528646
1003     3.225124
1004     0.394676
          ...    
1495     0.359666
1496     0.102434
1497     0.000406
1498    17.889386
1499     0.000018
Length: 500, dtype: float64

In [85]:
event_times = test["time"]
event_observed = test["event"]
event_times, event_observed

(1000    0.594752
 1001    0.594752
 1002    0.094617
 1003    0.450090
 1004    0.594752
           ...   
 1495    0.594752
 1496    0.594752
 1497    0.594752
 1498    0.158379
 1499    0.594752
 Name: time, Length: 500, dtype: float64,
 1000    False
 1001    False
 1002    False
 1003    False
 1004    False
         ...  
 1495    False
 1496    False
 1497    False
 1498    False
 1499    False
 Name: event, Length: 500, dtype: bool)

In [86]:
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y,dropout,dropout_time,end_censor,time,event
1000,-0.227662,1.953573,0.921170,-0.781057,-1.005937,1.0,0.0,1.0,1.0,1.0,3.114950,False,3.114950,True,0.594752,False
1001,-0.224389,0.470208,1.353653,-1.240791,-0.400471,1.0,0.0,1.0,0.0,0.0,1.121853,True,0.606354,True,0.594752,False
1002,-1.922499,-1.010341,-0.775196,-0.820469,0.278782,0.0,1.0,1.0,0.0,0.0,0.900131,True,0.094617,False,0.094617,False
1003,0.444050,0.038986,-1.881623,-1.569874,1.432481,0.0,1.0,0.0,0.0,0.0,0.684675,True,0.450090,False,0.450090,False
1004,-2.119922,-1.067572,-1.268957,-1.199887,-0.610640,0.0,1.0,0.0,0.0,1.0,0.643387,False,0.643387,True,0.594752,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,-0.495976,-0.587850,-0.773309,-1.009303,-0.272919,1.0,1.0,1.0,1.0,0.0,0.958283,False,0.958283,True,0.594752,False
1496,0.418295,-0.852769,1.020421,0.005004,-0.009457,1.0,0.0,1.0,1.0,0.0,1.161690,False,1.161690,True,0.594752,False
1497,0.660563,0.903730,1.606314,0.262362,-0.643334,0.0,0.0,1.0,0.0,1.0,3.363501,True,3.173903,True,0.594752,False
1498,1.032771,-0.733712,-0.474936,0.838676,1.823454,0.0,1.0,0.0,1.0,1.0,0.258619,True,0.158379,False,0.158379,False


In [87]:
from lifelines.utils import concordance_index

In [88]:
concordance_index(event_times, -test_preds, event_observed)

0.8628988374969082

You can also look at the concordance without censoring.

In [66]:
event_times = test["y"]
concordance_index(event_times, -test_preds)

0.8162164328657314

In [89]:
from sksurv.metrics import concordance_index_ipcw

In [131]:
survival_train = case_subcohort[["event","time"]].to_records(index = False)
survival_test = test[["event","time"]].to_records(index = False)

In [110]:
concordance_index_ipcw(survival_train, survival_test, test_preds)

(0.8569892827626162, 34887, 5543, 0, 0)

In [112]:
from sksurv.metrics import brier_score

In [115]:
max(test["time"])

0.5947519276410911

In [139]:
test_preds = cph.predict_survival_function(test[range(0,10)],times = 0.5)
test_preds = np.array(test_preds).transpose()
# test_preds

In [141]:
brier_score(survival_test, survival_test, test_preds,0.5)

(array([0.5]), array([0.09319492]))

In [144]:
from sksurv.metrics import cumulative_dynamic_auc

In [145]:
test_preds = cph.predict_partial_hazard(test[range(0,10)])

In [148]:
cumulative_dynamic_auc(survival_train,survival_test,test_preds,[0.1,0.2,0.3,0.4,0.5])

  true_pos = cumsum_tp / cumsum_tp[-1]


(array([       nan, 0.97143654, 0.95956232, 0.93030735, 0.88647603]), nan)

## Regularised

In [59]:
cph = cph = CoxPHFitter(penalizer = 0.01, l1_ratio = 1)
cph.fit(case_subcohort_df, entry_col = "start_time", duration_col = "time",event_col = "event",weights_col = "weights",robust = True)
cph.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'time'
event col,'event'
weights col,'weights'
entry col,'start_time'
penalizer,0.01
l1 ratio,1
robust variance,True
baseline estimation,breslow
number of observations,1323

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
0,-1.57,0.21,0.1,-1.77,-1.37,0.17,0.25,0.0,-15.37,<0.005,174.61
1,0.48,1.62,0.09,0.31,0.65,1.36,1.92,0.0,5.55,<0.005,25.03
2,0.05,1.05,0.07,-0.08,0.19,0.92,1.21,0.0,0.76,0.45,1.15
3,-0.97,0.38,0.09,-1.16,-0.79,0.31,0.46,0.0,-10.3,<0.005,80.16
4,-0.55,0.58,0.09,-0.73,-0.36,0.48,0.69,0.0,-5.84,<0.005,27.48
5,-0.61,0.54,0.16,-0.93,-0.28,0.39,0.75,0.0,-3.68,<0.005,12.07
6,-0.74,0.48,0.16,-1.05,-0.44,0.35,0.65,0.0,-4.75,<0.005,18.9
7,1.12,3.05,0.17,0.78,1.45,2.18,4.26,0.0,6.53,<0.005,33.83
8,1.22,3.38,0.17,0.89,1.55,2.44,4.69,0.0,7.32,<0.005,41.9
9,0.7,2.02,0.16,0.39,1.01,1.48,2.76,0.0,4.42,<0.005,16.64

0,1
Concordance,0.51
Partial AIC,3612.80
log-likelihood ratio test,684.30 on 10 df
-log2(p) of ll-ratio test,464.51


In [60]:
test_preds = cph.predict_partial_hazard(test[range(0,10)])
concordance_index(event_times, -test_preds, event_observed)

0.8683914847717638

In [149]:
cph.fit(case_subcohort_df, entry_col = "start_time", duration_col = "time",event_col = "event",weights_col = "weights",robust = True)

<lifelines.CoxPHFitter: fitted with 1220 total observations, 1000 right-censored observations>

## Survival tree

In [152]:
case_subcohort

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y,dropout,dropout_time,end_censor,time,event,start_time,subcohort
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0,0.464061,False,0.464061,False,0.464061,True,0.464060,False
6,-0.150383,0.508230,-0.953275,1.426950,1.803416,1.0,1.0,0.0,1.0,0.0,0.242059,False,0.242059,False,0.242059,True,0.242058,False
9,-0.223404,-0.792886,-0.935734,-0.501477,1.976391,1.0,0.0,0.0,1.0,0.0,0.550375,False,0.550375,False,0.550375,True,0.550374,False
10,-1.711492,-1.429028,-1.581501,-0.085552,0.414163,0.0,1.0,1.0,0.0,1.0,0.452305,False,0.452305,False,0.452305,True,0.452304,False
12,-0.494873,-0.435093,-1.118524,0.763039,0.039362,1.0,1.0,1.0,0.0,0.0,0.488331,False,0.488331,False,0.488331,True,0.488330,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,0.009606,-0.595212,1.706497,-0.220094,-0.864048,0.0,0.0,0.0,1.0,0.0,1.007541,False,1.007541,True,0.594752,False,0.000000,True
460,0.065113,1.421616,-0.983576,0.108782,0.360602,1.0,0.0,1.0,0.0,0.0,0.835957,False,0.835957,True,0.594752,False,0.000000,True
821,-0.922213,0.327069,-0.415452,0.349362,-0.841031,1.0,0.0,0.0,1.0,1.0,0.601071,False,0.601071,True,0.594752,False,0.000000,True
622,0.218759,-1.109913,-0.221019,0.062210,0.955832,0.0,0.0,0.0,1.0,0.0,0.518194,False,0.518194,False,0.518193,False,0.000000,True


In [158]:
X = case_subcohort[range(0,10)]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0
6,-0.150383,0.508230,-0.953275,1.426950,1.803416,1.0,1.0,0.0,1.0,0.0
9,-0.223404,-0.792886,-0.935734,-0.501477,1.976391,1.0,0.0,0.0,1.0,0.0
10,-1.711492,-1.429028,-1.581501,-0.085552,0.414163,0.0,1.0,1.0,0.0,1.0
12,-0.494873,-0.435093,-1.118524,0.763039,0.039362,1.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
356,0.009606,-0.595212,1.706497,-0.220094,-0.864048,0.0,0.0,0.0,1.0,0.0
460,0.065113,1.421616,-0.983576,0.108782,0.360602,1.0,0.0,1.0,0.0,0.0
821,-0.922213,0.327069,-0.415452,0.349362,-0.841031,1.0,0.0,0.0,1.0,1.0
622,0.218759,-1.109913,-0.221019,0.062210,0.955832,0.0,0.0,0.0,1.0,0.0


In [171]:
y = case_subcohort[["event","time"]].to_records(index = False)
# y

In [172]:
case_subcohort

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y,dropout,dropout_time,end_censor,time,event,start_time,subcohort
1,2.208006,0.752093,-1.559689,-0.378957,-0.278663,0.0,1.0,0.0,1.0,1.0,0.464061,False,0.464061,False,0.464061,True,0.464060,False
6,-0.150383,0.508230,-0.953275,1.426950,1.803416,1.0,1.0,0.0,1.0,0.0,0.242059,False,0.242059,False,0.242059,True,0.242058,False
9,-0.223404,-0.792886,-0.935734,-0.501477,1.976391,1.0,0.0,0.0,1.0,0.0,0.550375,False,0.550375,False,0.550375,True,0.550374,False
10,-1.711492,-1.429028,-1.581501,-0.085552,0.414163,0.0,1.0,1.0,0.0,1.0,0.452305,False,0.452305,False,0.452305,True,0.452304,False
12,-0.494873,-0.435093,-1.118524,0.763039,0.039362,1.0,1.0,1.0,0.0,0.0,0.488331,False,0.488331,False,0.488331,True,0.488330,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,0.009606,-0.595212,1.706497,-0.220094,-0.864048,0.0,0.0,0.0,1.0,0.0,1.007541,False,1.007541,True,0.594752,False,0.000000,True
460,0.065113,1.421616,-0.983576,0.108782,0.360602,1.0,0.0,1.0,0.0,0.0,0.835957,False,0.835957,True,0.594752,False,0.000000,True
821,-0.922213,0.327069,-0.415452,0.349362,-0.841031,1.0,0.0,0.0,1.0,1.0,0.601071,False,0.601071,True,0.594752,False,0.000000,True
622,0.218759,-1.109913,-0.221019,0.062210,0.955832,0.0,0.0,0.0,1.0,0.0,0.518194,False,0.518194,False,0.518193,False,0.000000,True


In [156]:
from sksurv.tree import SurvivalTree

In [157]:
survTree = SurvivalTree()

In [166]:
survTree.fit(X,y,sample_)

SurvivalTree()

In [169]:
test_preds = survTree.predict(test[range(0,10)])

In [170]:
concordance_index(event_times, -test_preds, event_observed)

0.7302003462775167