In [1]:
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import logging

# Setup paths
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.getcwd()))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# Setup directories
MODEL_DIR = os.path.join(os.getcwd(), 'model')
RESULTS_DIR = os.path.join(os.getcwd(), 'results')
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Imports
from preprocessing.data_container import DataContainer
from utils.evaluation import cindex_score
from models.modelling_process import ModellingProcess
from models.deep_surv_model import DeepSurvModel

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
# Data configuration
DATA_CONFIG = {
    'use_pca': False,
    'gene_type': 'intersection',
    'use_imputed': True,
    'use_cohorts': False
}

# Model configuration
MODEL_CONFIG = {
    'params_cv'  : {
        'model__hidden_layers': [[16, 16]],
        'model__learning_rate': [0.01],
        'model__batch_size': [64, 256], 
        'model__num_epochs': [10]
    },
    'refit': True, 
    'do_nested_resampling': True, 
    'path' : RESULTS_DIR, 
    'fname_cv' : 'results'}

ds_pipeline_steps = [('model', DeepSurvModel())]

In [3]:
mp = ModellingProcess()

In [4]:
mp.prepare_data(DATA_CONFIG, PROJECT_ROOT)

2024-11-22 11:38:07,051 - INFO - Loading data...
2024-11-22 11:41:09,735 - INFO - Loaded data: 1091 samples, 13214 features


In [5]:
nstd_res_result = mp.do_modelling(ds_pipeline_steps, MODEL_CONFIG)

2024-11-22 11:41:32,022 - INFO - Start model training...
2024-11-22 11:41:32,025 - INFO - Input data shape: X=(1091, 13214)
2024-11-22 11:41:32,027 - INFO - Nested resampling...
2024-11-22 11:41:32,031 - INFO - Starting nested resampling...
2024-11-22 11:41:32,034 - INFO - Data shape: X=(1091, 13214), groups=9 unique
2024-11-22 11:41:32,071 - INFO - 
Outer fold 1
2024-11-22 11:41:32,139 - INFO - Test cohort: Atlanta_2014_Long


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.0704, Validation Loss: 0.6108
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.8716, Validation Loss: 1.2510
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.4233, Validation Loss: 0.4620
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1413, Validation Loss: 0.4948
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0554, Validation Loss: 0.4801
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0301, Validation Loss: 0.4702
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0074, Validation Loss: 0.4386
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.0036, Validation Loss: 0.4956


2024-11-22 11:41:58,184 - INFO - Best parameters: {'model__batch_size': 256, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:41:58,185 - INFO - Test score: 0.549
2024-11-22 11:41:58,187 - INFO - 
Outer fold 2


In eval step
In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.0008, Validation Loss: 0.4691
In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 0.9982, Validation Loss: 0.4400


2024-11-22 11:41:58,251 - INFO - Test cohort: Belfast_2018_Jain


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.8213, Validation Loss: 1.4252
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.9784, Validation Loss: 0.9476
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.3796, Validation Loss: 0.8378
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.2371, Validation Loss: 0.8322
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.1900, Validation Loss: 0.8203
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.1432, Validation Loss: 0.8113
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.1194, Validation Loss: 0.8124
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.1166, Validation Loss: 0.8077


2024-11-22 11:42:04,459 - INFO - Best parameters: {'model__batch_size': 64, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:04,460 - INFO - Test score: 0.478
2024-11-22 11:42:04,461 - INFO - 
Outer fold 3


In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.1096, Validation Loss: 0.7983
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 1.1127, Validation Loss: 0.7930


2024-11-22 11:42:04,525 - INFO - Test cohort: CPC_GENE_2017_Fraser


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.2697, Validation Loss: 1.1989
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 2.1199, Validation Loss: 0.7311
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.3089, Validation Loss: 0.6031
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1446, Validation Loss: 0.5774
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0856, Validation Loss: 0.5453
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0493, Validation Loss: 0.5538
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0668, Validation Loss: 0.5734
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.0157, Validation Loss: 0.5974
In eval step
In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.0201, Validation Loss: 0.

2024-11-22 11:42:11,776 - INFO - Best parameters: {'model__batch_size': 256, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:11,777 - INFO - Test score: 0.568
2024-11-22 11:42:11,778 - INFO - 
Outer fold 4


In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 1.0327, Validation Loss: 0.5152


2024-11-22 11:42:11,850 - INFO - Test cohort: CPGEA_2020_Li


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.0538, Validation Loss: 0.9879
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.6648, Validation Loss: 0.7819
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.3051, Validation Loss: 0.7621
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.2177, Validation Loss: 0.7251
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0682, Validation Loss: 0.7021
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0422, Validation Loss: 0.6915
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0198, Validation Loss: 0.7180
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.0414, Validation Loss: 0.6931


2024-11-22 11:42:18,342 - INFO - Best parameters: {'model__batch_size': 64, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:18,343 - INFO - Test score: 0.557
2024-11-22 11:42:18,344 - INFO - 
Outer fold 5


In eval step
In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.0099, Validation Loss: 0.7158
In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 1.0188, Validation Loss: 0.7398


2024-11-22 11:42:18,411 - INFO - Test cohort: CamCap_2016_Ross_Adams


Fitting 8 folds for each of 2 candidates, totalling 16 fits




In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.8445, Validation Loss: 1.6923
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 2.1585, Validation Loss: 0.6962
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.3348, Validation Loss: 0.6588
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.2280, Validation Loss: 0.5834
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.1469, Validation Loss: 0.6024
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.1321, Validation Loss: 0.5813
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.1126, Validation Loss: 0.5726
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.1043, Validation Loss: 0.6009
In eval step
In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.1064, Validation Loss: 0.5686


2024-11-22 11:42:25,883 - INFO - Best parameters: {'model__batch_size': 256, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:25,885 - INFO - Test score: 0.480
2024-11-22 11:42:25,886 - INFO - 
Outer fold 6


In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 1.1007, Validation Loss: 0.6018


2024-11-22 11:42:25,978 - INFO - Test cohort: CancerMap_2017_Luca


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 1.7973, Validation Loss: 0.6378
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.4126, Validation Loss: 0.6482
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.1965, Validation Loss: 0.5973
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1133, Validation Loss: 0.5886
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0905, Validation Loss: 0.5809
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0767, Validation Loss: 0.5802
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0749, Validation Loss: 0.5798
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.0777, Validation Loss: 0.5765


2024-11-22 11:42:33,893 - INFO - Best parameters: {'model__batch_size': 64, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:33,894 - INFO - Test score: 0.678
2024-11-22 11:42:33,895 - INFO - 
Outer fold 7


In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.0640, Validation Loss: 0.5777
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 1.0614, Validation Loss: 0.5760


2024-11-22 11:42:33,963 - INFO - Test cohort: DKFZ_2018_Gerhauser


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.5248, Validation Loss: 1.8918
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 2.7966, Validation Loss: 1.0978
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.5093, Validation Loss: 0.7433
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1802, Validation Loss: 0.7699
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0575, Validation Loss: 0.6687
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0131, Validation Loss: 0.7388
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0252, Validation Loss: 0.6727
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 0.9899, Validation Loss: 0.6986


2024-11-22 11:42:41,344 - INFO - Best parameters: {'model__batch_size': 256, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:41,345 - INFO - Test score: 0.547
2024-11-22 11:42:41,346 - INFO - 
Outer fold 8
2024-11-22 11:42:41,397 - INFO - Test cohort: MSKCC_2010_Taylor


In eval step
In eval step
In eval step
In eval step
Epoch 9, Train Loss; 1.0167, Validation Loss: 0.7019
In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 0.9716, Validation Loss: 0.6410
Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 1.9513, Validation Loss: 0.7845
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.6163, Validation Loss: 0.5449
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.2627, Validation Loss: 0.5355
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1639, Validation Loss: 0.5100
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.1555, Validation Loss: 0.5078
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.1364, Validation Loss: 0.5094
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.1372, Validation Loss: 0.5070
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 1.1219, Validation Loss: 

2024-11-22 11:42:48,838 - INFO - Best parameters: {'model__batch_size': 64, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:48,839 - INFO - Test score: 0.575
2024-11-22 11:42:48,840 - INFO - 
Outer fold 9
2024-11-22 11:42:48,903 - INFO - Test cohort: Stockholm_2016_Ross_Adams


Fitting 8 folds for each of 2 candidates, totalling 16 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.0937, Validation Loss: 0.5666
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.3849, Validation Loss: 0.4141
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.0057, Validation Loss: 0.4019
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 0.9278, Validation Loss: 0.4323
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 0.9733, Validation Loss: 0.4014
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 0.9421, Validation Loss: 0.4494
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 0.9380, Validation Loss: 0.4302
In eval step
In eval step
In eval step
In eval step
Epoch 8, Train Loss; 0.9298, Validation Loss: 0.4209
In eval step


2024-11-22 11:42:55,857 - INFO - Best parameters: {'model__batch_size': 64, 'model__hidden_layers': [16, 16], 'model__learning_rate': 0.01, 'model__num_epochs': 10}
2024-11-22 11:42:55,858 - INFO - Test score: 0.619
2024-11-22 11:42:55,859 - INFO - Aggregated results:
2024-11-22 11:42:55,859 - INFO - Mean score: 0.561 ± 0.059
2024-11-22 11:42:55,860 - INFO - Individual scores: [np.float64(0.5492774566473988), np.float64(0.47815142972211033), np.float64(0.568087318087318), np.float64(0.5567853705486044), np.float64(0.479702300405954), np.float64(0.6776859504132231), np.float64(0.5474209650582362), np.float64(0.5753105590062112), np.float64(0.6187985150185623)]
2024-11-22 11:42:55,868 - INFO - Do HP Tuning for complete model; refit + set complete model
2024-11-22 11:42:55,869 - INFO - Do HP Tuning for complete model


In eval step
In eval step
In eval step
Epoch 9, Train Loss; 0.9300, Validation Loss: 0.4623
In eval step
In eval step
In eval step
In eval step
Epoch 10, Train Loss; 0.9207, Validation Loss: 0.4080
Fitting 9 folds for each of 2 candidates, totalling 18 fits
In eval step
In eval step
In eval step
In eval step
Epoch 1, Train Loss; 2.2298, Validation Loss: 0.8125
In eval step
In eval step
In eval step
In eval step
Epoch 2, Train Loss; 1.7607, Validation Loss: 0.9081
In eval step
In eval step
In eval step
In eval step
Epoch 3, Train Loss; 1.5328, Validation Loss: 0.8122
In eval step
In eval step
In eval step
In eval step
Epoch 4, Train Loss; 1.1272, Validation Loss: 0.7730
In eval step
In eval step
In eval step
In eval step
Epoch 5, Train Loss; 1.0919, Validation Loss: 0.6872
In eval step
In eval step
In eval step
In eval step
Epoch 6, Train Loss; 1.0711, Validation Loss: 0.7005
In eval step
In eval step
In eval step
In eval step
Epoch 7, Train Loss; 1.0464, Validation Loss: 0.7338
In eval