rfe features (by recurrence)

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sksurv.ensemble import RandomSurvivalForest, GradientBoostingSurvivalAnalysis
from sksurv.metrics import concordance_index_censored,concordance_index_ipcw
from sklearn.model_selection import cross_validate, cross_val_score
import joblib

In [4]:
def c_index(clf, X, y_struct):
    return concordance_index_censored(y_struct['Status'],y_struct['Survival'],clf.predict(X))[0]


def cx_pipelines(pipelines, X, y_struct, cv=5, scoring=c_index):
    scores = dict()

    for model, pipeline in pipelines.items():
        print(f"{model}: Cross validation...")
        score = cross_val_score(pipeline, X, y_struct, cv=cv, scoring=scoring)
        scores[model] = score
        print(f"Score:{score}")
        print("Done.")
        print("="*8)

    return scores

In [5]:
training_df = pd.read_csv("../../data/train_test/training.csv")
X_train = training_df.drop(columns=['survival_status', 'survival_time', 'recurrence', 'metastasis'])
y_train_struct = training_df[['survival_status', 'survival_time']].to_records(index=False).astype([('Status', 'bool'), ('Survival', 'float64')])

In [6]:
pipelines = joblib.load("../../data/pipelines/002-003-basic-pipelines.pl")

In [8]:
exps = {
    'rfe_lsvc': list(pd.read_csv("../../data/selected_features/000-features_rfe_lsvc.csv")),
    'rfe_rfc': list(pd.read_csv("../../data/selected_features/000-features_rfe_rfc.csv")),
    'rfe_xgb': list(pd.read_csv("../../data/selected_features/000-features_rfe_xgb.csv")),
}

In [9]:
scores = dict()

In [10]:
for exp, features in exps.items():
    print(f"Exp: {exp}")
    X_train_Selected = X_train[features]
    score = cx_pipelines(pipelines, X_train_Selected, y_train_struct)
    scores[exp] = score
    pd.DataFrame(score, columns=score.keys()).to_csv(f"../../data/results/002-007-{exp}_features_cx_cindex.csv", index=False)
    print("="*12)

Exp: rfe_lsvc
coxnet: Cross validation...
Score:[0.50649351 0.52793296 0.47841727 0.70050761 0.49295775]
Done.
rf: Cross validation...
Score:[0.46753247 0.61731844 0.57913669 0.69035533 0.53169014]
Done.
gb: Cross validation...
Score:[0.51623377 0.5698324  0.49640288 0.60406091 0.58802817]
Done.
fksvm: Cross validation...


  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)


Score:[0.51623377 0.62290503 0.48201439 0.59898477 0.43661972]
Done.
Exp: rfe_rfc
coxnet: Cross validation...
Score:[0.56493506 0.70670391 0.58633094 0.70558376 0.65492958]
Done.
rf: Cross validation...
Score:[0.47077922 0.58379888 0.51798561 0.66497462 0.65492958]
Done.
gb: Cross validation...
Score:[0.54220779 0.54469274 0.49280576 0.64974619 0.6584507 ]
Done.
fksvm: Cross validation...


  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)


Score:[0.5        0.66061453 0.42446043 0.75634518 0.43309859]
Done.
Exp: rfe_xgb
coxnet: Cross validation...
Score:[0.46103896 0.55027933 0.4028777  0.59898477 0.5       ]
Done.
rf: Cross validation...
Score:[0.60714286 0.55307263 0.47122302 0.60406091 0.51056338]
Done.
gb: Cross validation...
Score:[0.34415584 0.60614525 0.52877698 0.74619289 0.42957746]
Done.
fksvm: Cross validation...
Score:[0.3961039  0.54608939 0.25179856 0.54314721 0.5       ]
Done.


  self._final_estimator.fit(Xt, y, **fit_params_last_step)
