# Contrastive Learning for Predicting Cancer Prognosis Using Gene Expression Values

## Sample Model Prediction

*PredictThroughCoxModel.ipynb* notebook is offering detailed step-by-step instructions on how to predict cox hazards results based on the public models for each cancer.
Please put this notebook at the same path as Cox Models under CL4CaPro_Models folder

### Pick Cancer

In [2]:
Cancer = 'BLCA'

### Put Input Patient Info
e.g. put your input in *BLCA_predict_input.csv*

In [None]:
input_pth = 'BLCA_predict_input.csv'

### Read Input and Check

In [None]:
import pandas as pd
input_df = pd.read_csv(input_pth)
input_df

### Generate contrastive learning features based on the public cancer model

#### Get model path

In [None]:
import os

def find_clcp_folder_name(directory):
    for folder_name in os.listdir(directory):
        if folder_name.startswith('CLCP'):
            return folder_name
    return 'No CLCP folder found.'

# Assuming the directory to search is the current working directory
directory_to_search = './{}'.format(Cancer)
clcp_folder_name = find_clcp_folder_name(directory_to_search)
clcp_folder_name
model_pth = './{}/{}'.format(Cancer, clcp_folder_name)

#### Generate feature

In [None]:
para = clcp_folder_name.split('_')
input_dim = para[1]
model_n_hidden_1 = para[2]
model_out_dim = para[3]
feat_dim = para[5]
batch_size = para[-3]
l2_rate = para[9]
seed = para[13]
round = para[11]
device = 0
lr = para[7]

In [None]:
! python GenerateFeatures_Predict.py --layer_name feat --model_in_dim {input_dim} --dim_1_list {model_n_hidden_1} \
                                     --dim_2_list {model_out_dim} --dim_3_list {feat_dim} --batch_size {batch_size} \
                                     --l2_rate {l2_rate} --seed {seed} --round {round} --gpu_device {device} \
                                     --learning_rate_list {lr} --task WholeTimeSeq \
                                     --cancer_group {cancer}

#### Predict Results

##### Cox-XGB
Predict Results

In [None]:
import xgboost as xgb

# Initialize a model instance
loaded_cox_model = xgb.Booster()

# Load the model from the file
loaded_cox_model.load_model('./{}/coxxgb_model.json'.format(Cancer))

predict_input_df = pd.read_csv('Features/PredictFeature_{}.txt'.format(Cancer))
X = predict_input_df.iloc[:, 6:]


predictions = loaded_cox_model.predict(X)

Calculate C-index and IBS

In [None]:
from xgbse.converters import convert_data_to_xgb_format
from sksurv.metrics import concordance_index_censored, integrated_brier_score

y = predict_input_df[['PFItime', 'PFI']]
dval = convert_data_to_xgb_format(X, y, 'survival:cox')

preds = loaded_cox_model.predict(dval)

scores = concordance_index_censored(y['Status'], y['Survival_in_days'], preds)
c_index = round(scores[0], 10)

baseline_model = BreslowEstimator().fit(train_pred, y['Status'], y['Survival_in_days'])
survs = baseline_model.get_survival_function(test_pred)
preds = np.asarray([[fn(t) for t in times] for fn in survs])
scores = integrated_brier_score(y, y_test, preds, times)
ibs = round(scores[0], 6)

print(c_index, ibs)

##### Cox-EN
Predict Results

In [None]:
from joblib import load

# Load the model from file
estimator_loaded = load('./{}/coxen_model.joblib'.format(Cancer))

predict_input_df = pd.read_csv('Features/PredictFeature_{}.txt'.format(Cancer))
X = predict_input_df.iloc[:, 6:]

# Now you can use estimator_loaded for predictions
predictions = estimator_loaded.predict(X)

Calculate C-index and IBS

In [None]:
from xgbse.converters import convert_data_to_xgb_format
from sksurv.metrics import concordance_index_censored, integrated_brier_score

y = predict_input_df[['PFItime', 'PFI']]

scores = concordance_index_censored(y['Status'], y['Survival_in_days'], estimator_loaded.predict(X))
c_index = round(scores[0], 6)

train_preds = estimator_loaded.predict(x_train)
baseline_model = BreslowEstimator().fit(train_pred, y['Status'], y['Survival_in_days'])
survs = baseline_model.get_survival_function(test_pred)
preds = np.asarray([[fn(t) for t in times] for fn in survs])
scores = integrated_brier_score(y, y_test, preds, times)
ibs = round(scores[0], 6)

print(c_index, ibs)

##### Cox-nnet
Predict Results

In [None]:
from cox_nnet import *

# Load the model from file
coxnnet_loaded = loadModel('./{}/coxnn_model.pkl'.format(Cancer))

predict_input_df = pd.read_csv('Features/PredictFeature_{}.txt'.format(Cancer))
X = predict_input_df.iloc[:, 6:]

predictions = coxnnet_loaded.predictNewData(X)

Calculate C-index and IBS

In [None]:
from xgbse.converters import convert_data_to_xgb_format
from sksurv.metrics import concordance_index_censored, integrated_brier_score

y = predict_input_df[['PFItime', 'PFI']]

scores = concordance_index_censored(y['Status'], y['Survival_in_days'], coxnnet_loaded.predictNewData(X))
c_index = round(scores[0], 6)

train_preds = coxnnet_loaded.predictNewData(x_train)
baseline_model = BreslowEstimator().fit(train_pred, y['Status'], y['Survival_in_days'])
survs = baseline_model.get_survival_function(test_pred)
preds = np.asarray([[fn(t) for t in times] for fn in survs])
scores = integrated_brier_score(y, y_test, preds, times)
ibs = round(scores[0], 6)

print(c_index, ibs)