Preliminary Evaluation: all features
- results/002-003-all_features_cx_cindex.csv

In [35]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sksurv.ensemble import RandomSurvivalForest, GradientBoostingSurvivalAnalysis
from sksurv.svm import FastKernelSurvivalSVM
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sklearn.pipeline import  make_pipeline
from sksurv.metrics import concordance_index_censored,concordance_index_ipcw
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate, cross_val_score
import joblib

In [36]:
training_df = pd.read_csv("../../data/train_test/training.csv")

In [37]:
X_train = training_df.drop(columns=['survival_status', 'survival_time', 'recurrence', 'metastasis'])
y_train_struct = training_df[['survival_status', 'survival_time']].to_records(index=False).astype([('Status', 'bool'), ('Survival', 'float64')])

In [38]:
def c_index(clf, X, y_struct):
    return concordance_index_censored(y_struct['Status'],y_struct['Survival'],clf.predict(X))[0]

In [39]:
pipelines = {
    'coxnet': make_pipeline(StandardScaler(), CoxnetSurvivalAnalysis()),
    'rf': make_pipeline(StandardScaler(), RandomSurvivalForest()),
    'gb': make_pipeline(StandardScaler(), GradientBoostingSurvivalAnalysis()),
    'fksvm': make_pipeline(StandardScaler(), FastKernelSurvivalSVM())
}

In [40]:
scores = dict()

for model, pipeline in pipelines.items():
    print(f"{model}: Cross validation...")
    score = cross_val_score(pipeline, X_train, y_train_struct, cv=5, scoring=c_index)
    scores[model] = score
    print(f"Score:{score}")
    print("Done.")
    print("="*8)

coxnet: Cross validation...
Score:[0.50974026 0.62290503 0.58992806 0.61928934 0.57042254]
Done.
rf: Cross validation...
Score:[0.62012987 0.59217877 0.4028777  0.56345178 0.45774648]
Done.
gb: Cross validation...
Score:[0.54545455 0.55027933 0.5        0.46700508 0.38732394]
Done.
fksvm: Cross validation...


  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)


Score:[0.52597403 0.57122905 0.34892086 0.70558376 0.6056338 ]
Done.


In [41]:
pd.DataFrame(scores, columns=scores.keys()).to_csv("../../data/results/002-003-all_features_cx_cindex.csv", index=False)

In [42]:
joblib.dump(pipelines, "../../data/pipelines/002-003-basic-pipelines.pl")

['../../data/pipelines/002-003-basic-pipelines.pl']