In [4]:
import time
from WeightDistribution import Simple
from WeightDistribution import Wilcoxon
from WeightDistribution import Tarone_Ware
from WeightDistribution import Peto
from Concordance_index import concordance_index
from lifelines import datasets
from sklearn.model_selection import train_test_split

rossi = datasets.load_rossi()
print('The dataset is:')
print(rossi)
# Attention: duration column (time until event occurs) must be index 1, event column index 0 in y
y = rossi.loc[:, ["arrest", "week"]]
# print(y)
X = rossi.drop(["arrest", "week"], axis=1)
# print(X)
X, X_test, y, y_test = train_test_split(X, y, test_size=0.33, random_state=10)


The dataset is:
     week  arrest  fin  age  race  wexp  mar  paro  prio
0      20       1    0   27     1     0    0     1     3
1      17       1    0   18     1     0    0     1     8
2      25       1    0   19     0     1    0     1    13
3      52       0    1   23     1     1    1     1     1
4      52       0    0   19     0     1    0     1     3
..    ...     ...  ...  ...   ...   ...  ...   ...   ...
427    52       0    1   31     0     1    0     1     3
428    52       0    0   20     1     0    0     1     1
429    52       0    1   20     1     1    1     1     1
430    52       0    0   29     1     1    0     1     3
431    52       0    1   24     1     1    0     1     1

[432 rows x 9 columns]


In [8]:
numberOfTrees = [30,20,40]
for numT in numberOfTrees:
    print('Number of Trees in the Random Survival Forest: ', numT)
    print('Using Simple Log-Rank test for Random Survival Forest')
    print("Start training...")
    start_time = time.time()
    rsf = Simple.RandomSurvivalForest(n_estimators=numT, n_jobs=-1, random_state=10)
    rsf = rsf.fit(X, y)
    print(f'--- {round(time.time() - start_time, 3)} seconds ---')
    y_pred = rsf.predict(X_test)
    # print("Number of Survival Trees: ",len(y_pred[1]))
    c_val = concordance_index(y_time=y_test["week"], y_pred=y_pred, y_event=y_test["arrest"])
    print(f'Concordance index: {round(c_val, 3)}\n')

    print('Using Wilcoxon weightings for Log-Rank test for Random Survival Forest')
    print("Start training...")
    start_time = time.time()
    rsf = Wilcoxon.RandomSurvivalForest(n_estimators=numT, n_jobs=-1, random_state=10)
    rsf = rsf.fit(X, y)
    print(f'--- {round(time.time() - start_time, 3)} seconds ---')
    y_pred = rsf.predict(X_test)
    # print(len(y_pred[1]))
    c_val = concordance_index(y_time=y_test["week"], y_pred=y_pred, y_event=y_test["arrest"])
    print(f'Concordance index: {round(c_val, 3)}\n')

    print('Using Tarone-Ware weightings for Log-Rank test for Random Survival Forest')
    print("Start training...")
    start_time = time.time()
    rsf = Tarone_Ware.RandomSurvivalForest(n_estimators=numT, n_jobs=-1, random_state=10)
    rsf = rsf.fit(X, y)
    print(f'--- {round(time.time() - start_time, 3)} seconds ---')
    y_pred = rsf.predict(X_test)
    # print(len(y_pred[1]))
    c_val = concordance_index(y_time=y_test["week"], y_pred=y_pred, y_event=y_test["arrest"])
    print(f'Concordance index: {round(c_val, 3)}\n')

    print('Using Peto weightings for Log-Rank test for Random Survival Forest')
    print("Start training...")
    start_time = time.time()
    rsf = Peto.RandomSurvivalForest(n_estimators=numT, n_jobs=-1, random_state=10)
    rsf = rsf.fit(X, y)
    print(f'--- {round(time.time() - start_time, 3)} seconds ---')
    y_pred = rsf.predict(X_test)
    # print(len(y_pred[1]))
    c_val = concordance_index(y_time=y_test["week"], y_pred=y_pred, y_event=y_test["arrest"])
    print(f'Concordance index: {round(c_val, 3)}\n')
    print('---------------------------------------------')

Number of Trees in the Random Survival Forest:  30
Using Simple Log-Rank test for Random Survival Forest
Start training...
--- 21.928 seconds ---
Concordance index: 0.654

Using Wilcoxon weightings for Log-Rank test for Random Survival Forest
Start training...
--- 18.306 seconds ---
Concordance index: 0.656

Using Tarone-Ware weightings for Log-Rank test for Random Survival Forest
Start training...
--- 17.825 seconds ---
Concordance index: 0.659

Using Peto weightings for Log-Rank test for Random Survival Forest
Start training...
--- 18.667 seconds ---
Concordance index: 0.671

---------------------------------------------
Number of Trees in the Random Survival Forest:  20
Using Simple Log-Rank test for Random Survival Forest
Start training...
--- 16.431 seconds ---
Concordance index: 0.622

Using Wilcoxon weightings for Log-Rank test for Random Survival Forest
Start training...
--- 16.543 seconds ---
Concordance index: 0.651

Using Tarone-Ware weightings for Log-Rank test for Random S