In [1]:
from config import *
from pysurvival.utils import load_model
import pandas as pd
import math
import numpy as np

feature = 'all'

# Load model
csf = load_model(config["save_path"] + "csf_" + feature + ".zip")
rsf = load_model(config["save_path"] + "rsf_" + feature + ".zip")
esf = load_model(config["save_path"] + "esf_" + feature + ".zip")
coxph = load_model(config["save_path"] + "coxph" + ".zip")
nonlinear_coxph = load_model(config["save_path"] + "nonlinear_coxph" + ".zip")
gomp_model = load_model(config["save_path"] + "gomp_model" + ".zip")

# read data
train = pd.read_csv(config["path_ok"] + "data_train.csv")
test = pd.read_csv(config["path_ok"] + "data_test.csv")

# Defining the time and event column
time_column = 'time'
event_column = 'state'

# encode, drop...
train['time'] = train['time'].apply(lambda x: math.ceil(x))
test['time'] = test['time'].apply(lambda x: math.ceil(x))
train['therapy'] = train['therapy'].apply(lambda x: 0 if x == 'TACE' else 1)
test['therapy'] = test['therapy'].apply(lambda x: 0 if x == 'TACE' else 1)

# Defining the modeling features
features_all = np.setdiff1d(train.columns, ['time', 'state', 'ID']).tolist()

X_train, X_test = train[features_all], test[features_all]
T_train, T_test = train[time_column], test[time_column]
E_train, E_test = train[event_column], test[event_column]

# 预测训练集
prs_csf = csf.predict_survival(X_train)
prs_rsf = rsf.predict_survival(X_train)
prs_esf = esf.predict_survival(X_train)
prs_coxph = coxph.predict_survival(X_train)
prs_nonlinear_coxph = nonlinear_coxph.predict_survival(X_train)
prs_gomp_model = gomp_model.predict_survival(X_train)

# 预测测试集
prs_csf_test = csf.predict_survival(X_test)
prs_rsf_test = rsf.predict_survival(X_test)
prs_esf_test = esf.predict_survival(X_test)
prs_coxph_test = coxph.predict_survival(X_test)
prs_nonlinear_coxph_test = nonlinear_coxph.predict_survival(X_test)
prs_gomp_model_test = gomp_model.predict_survival(X_test)

print("Train CSF:", prs_csf, prs_csf.shape)
print("Train RSF:", prs_rsf, prs_rsf.shape)
print("Train ESF:", prs_esf, prs_esf.shape)
print("Train Coxph:", prs_coxph, prs_coxph.shape)
print("Train Nonlinear Coxph:", prs_nonlinear_coxph, prs_nonlinear_coxph.shape)
print("Train GOMP Model:", prs_gomp_model, prs_gomp_model.shape)

print("Test CSF:", prs_csf_test, prs_csf_test.shape)
print("Test RSF:", prs_rsf_test, prs_rsf_test.shape)
print("Test ESF:", prs_esf_test, prs_esf_test.shape)
print("Test Coxph:", prs_coxph_test, prs_coxph_test.shape)
print("Test Nonlinear Coxph:", prs_nonlinear_coxph_test, prs_nonlinear_coxph_test.shape)
print("Test GOMP Model:", prs_gomp_model_test, prs_gomp_model_test.shape)

Loading the model from ./models/csf_all.zip


  # Remove the CWD from sys.path while we load stuff.


Loading the model from ./models/rsf_all.zip


  # This is added back by InteractiveShellApp.init_path()


Loading the model from ./models/esf_all.zip


  if sys.path[0] == "":


Loading the model from ./models/coxph.zip
Loading the model from ./models/nonlinear_coxph.zip
Loading the model from ./models/gomp_model.zip


  del sys.path[0]
  
  from ipykernel import kernelapp as app


Train CSF: [[0.98231022 0.96252017 0.9429391  ... 0.44922252 0.44922252 0.44922252]
 [0.99688165 0.99321791 0.98279461 ... 0.41046199 0.41046199 0.41046199]
 [0.99556935 0.98794076 0.97147868 ... 0.30758491 0.30758491 0.30758491]
 ...
 [0.98778449 0.96724905 0.92554504 ... 0.1846733  0.1846733  0.1846733 ]
 [0.99175066 0.98041253 0.97019958 ... 0.50879261 0.50879261 0.50879261]
 [0.99600529 0.9921442  0.98021189 ... 0.36825482 0.36825482 0.36825482]] (4192, 112)
Train RSF: [[0.98842975 0.97945156 0.96787224 ... 0.55526038 0.55526038 0.55526038]
 [0.99931063 0.99515527 0.99341429 ... 0.60706881 0.60706881 0.60706881]
 [0.99669572 0.98963131 0.97287771 ... 0.42710449 0.42710449 0.42710449]
 ...
 [0.9907116  0.96925854 0.92529606 ... 0.20261138 0.20261138 0.20261138]
 [0.99151052 0.98187601 0.97327565 ... 0.67541009 0.67541009 0.67541009]
 [0.99737838 0.99737838 0.99120965 ... 0.53281831 0.53281831 0.53281831]] (4192, 112)
Train ESF: [[0.98501426 0.96941583 0.95191595 ... 0.50909554 0.509

In [2]:
train = pd.read_csv(config["path_ok"] + "data_train.csv")
test = pd.read_csv(config["path_ok"] + "data_test.csv")

In [3]:
# concat the prs
train_csf = pd.concat([train, pd.DataFrame(prs_csf)], axis=1)
train_rsf = pd.concat([train, pd.DataFrame(prs_rsf)], axis=1)
train_esf = pd.concat([train, pd.DataFrame(prs_esf)], axis=1)
train_coxph = pd.concat([train, pd.DataFrame(prs_coxph)], axis=1)
train_nonlinear_coxph = pd.concat([train, pd.DataFrame(prs_nonlinear_coxph)], axis=1)
train_gomp_model = pd.concat([train, pd.DataFrame(prs_gomp_model)], axis=1)

test_csf = pd.concat([test, pd.DataFrame(prs_csf_test)], axis=1)
test_rsf = pd.concat([test, pd.DataFrame(prs_rsf_test)], axis=1)
test_esf = pd.concat([test, pd.DataFrame(prs_esf_test)], axis=1)
test_coxph = pd.concat([test, pd.DataFrame(prs_coxph_test)], axis=1)
test_nonlinear_coxph = pd.concat([test, pd.DataFrame(prs_nonlinear_coxph_test)], axis=1)
test_gomp_model = pd.concat([test, pd.DataFrame(prs_gomp_model_test)], axis=1)

In [4]:
# concat the time and event
from utils import *
v = 0.5
# 找到每一个样本的中位生存时间
time = []
for i in range(train_csf.shape[0]):
    middle_day = prs_csf[i].tolist().index(find_nearest(prs_csf[i], v))
    time.append(middle_day)
train_csf['pred_time'] = time
train_csf['pred_state'] = 1

time = []
for i in range(train_rsf.shape[0]):
    middle_day = prs_rsf[i].tolist().index(find_nearest(prs_rsf[i], v))
    time.append(middle_day)
train_rsf['pred_time'] = time
train_rsf['pred_state'] = 1

time = []
for i in range(train_esf.shape[0]):
    middle_day = prs_esf[i].tolist().index(find_nearest(prs_esf[i], v))
    time.append(middle_day)
train_esf['pred_time'] = time
train_esf['pred_state'] = 1

time = []
for i in range(train_coxph.shape[0]):
    middle_day = prs_coxph[i].tolist().index(find_nearest(prs_coxph[i], v))
    time.append(middle_day)
train_coxph['pred_time'] = time
train_coxph['pred_state'] = 1

time = []
for i in range(train_nonlinear_coxph.shape[0]):
    middle_day = prs_nonlinear_coxph[i].tolist().index(find_nearest(prs_nonlinear_coxph[i], v))
    time.append(middle_day)
train_nonlinear_coxph['pred_time'] = time
train_nonlinear_coxph['pred_state'] = 1

time = []
for i in range(train_gomp_model.shape[0]):
    middle_day = prs_gomp_model[i].tolist().index(find_nearest(prs_gomp_model[i], v))
    time.append(middle_day)
train_gomp_model['pred_time'] = time
train_gomp_model['pred_state'] = 1

time = []
for i in range(test_csf.shape[0]):
    middle_day = prs_csf_test[i].tolist().index(find_nearest(prs_csf_test[i], v))
    time.append(middle_day)
test_csf['pred_time'] = time
test_csf['pred_state'] = 1

time = []
for i in range(test_rsf.shape[0]):
    middle_day = prs_rsf_test[i].tolist().index(find_nearest(prs_rsf_test[i], v))
    time.append(middle_day)
test_rsf['pred_time'] = time
test_rsf['pred_state'] = 1

time = []
for i in range(test_esf.shape[0]):
    middle_day = prs_esf_test[i].tolist().index(find_nearest(prs_esf_test[i], v))
    time.append(middle_day)
test_esf['pred_time'] = time
test_esf['pred_state'] = 1

time = []
for i in range(test_coxph.shape[0]):
    middle_day = prs_coxph_test[i].tolist().index(find_nearest(prs_coxph_test[i], v))
    time.append(middle_day)
test_coxph['pred_time'] = time
test_coxph['pred_state'] = 1

time = []
for i in range(test_nonlinear_coxph.shape[0]):
    middle_day = prs_nonlinear_coxph_test[i].tolist().index(find_nearest(prs_nonlinear_coxph_test[i], v))
    time.append(middle_day)
test_nonlinear_coxph['pred_time'] = time
test_nonlinear_coxph['pred_state'] = 1

time = []
for i in range(test_gomp_model.shape[0]):
    middle_day = prs_gomp_model_test[i].tolist().index(find_nearest(prs_gomp_model_test[i], v))
    time.append(middle_day)
test_gomp_model['pred_time'] = time
test_gomp_model['pred_state'] = 1

In [7]:
# 将预测结果写入文件
train_csf.to_csv('./data/CSF_train_before.csv', index=False)
train_rsf.to_csv('./data/RSF_train_before.csv', index=False)
train_esf.to_csv('./data/ESF_train_before.csv', index=False)
train_coxph.to_csv('./data/CoxPH_train_before.csv', index=False)
train_nonlinear_coxph.to_csv('./data/Nonlinear_CoxPH_train_before.csv', index=False)
train_gomp_model.to_csv('./data/GOMP_train_before.csv', index=False)

test_csf.to_csv('./data/CSF_test_before.csv', index=False)
test_rsf.to_csv('./data/RSF_test_before.csv', index=False)
test_esf.to_csv('./data/ESF_test_before.csv', index=False)
test_coxph.to_csv('./data/CoxPH_test_before.csv', index=False)
test_nonlinear_coxph.to_csv('./data/Nonlinear_CoxPH_test_before.csv', index=False)
test_gomp_model.to_csv('./data/GOMP_test_before.csv', index=False)