# Predicting lung cancer survival time by OWKIN

### Problem

- supervised survival prediction problem
- predict the survival time of a patient (remaining days to live) from one three-dimensional CT scan (grayscale image) and a set of pre-extracted quantitative imaging features, as well as clinical data

### Import

In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import autosklearn.regression
import time

# concordance index (C-index)
from metrics_t9gbvr2 import cindex



### Data

- x_train : data_Q0G7b5t
- y_train : output_VSVxRFU.csv
- x_test : data_9Cbe5hx

In [2]:
data_folder_path = "../data"
training_folder_path = os.path.join(data_folder_path, "data_Q0G7b5t")
test_folder_path = os.path.join(data_folder_path, "data_9Cbe5hx")

training_ct_scan_names = [os.path.join(root,file_name) for root,_,file_names in os.walk(training_folder_path) for file_name in file_names if file_name.endswith('.npz')]
test_ct_scan_names = [os.path.join(root,file_name) for root,_,file_names in os.walk(test_folder_path) for file_name in file_names if file_name.endswith('.npz')]

print("Number of training ct scans : {}".format(len(training_ct_scan_names)))
print("Number of test ct scans : {}".format(len(test_ct_scan_names)))

training_features_path = os.path.join(training_folder_path, "features")
test_features_path = os.path.join(test_folder_path, "features")

submission_file_path = "../random_submission_example"

Number of training ct scans : 300
Number of test ct scans : 125


In [3]:
archive = np.load(training_ct_scan_names[0])
scan = archive['scan']
mask = archive['mask']
# scan.shape equals mask.shape

In [4]:
df_train_output = pd.read_csv(os.path.join(data_folder_path, "output_VSVxRFU.csv"), index_col=0)
p0 = df_train_output.loc[202]
print("p0.Event", p0.Event) # prints 1 or 0
print("p0.SurvivalTime", p0.SurvivalTime)
# prints time to event (time to death or time to last known alive) in days

p0.Event 0
p0.SurvivalTime 1378


In [5]:
df_train_output.sample(5)

Unnamed: 0_level_0,SurvivalTime,Event
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1
181,433,1
227,1153,1
396,66,0
82,910,0
94,2178,0


### Interpretation

(`1=death observed`, `0=escaped from study`)

### Load training data

In [6]:
file_name = os.path.join(training_features_path, "clinical_data.csv")
df_training_clinical_data = pd.read_csv(file_name, delimiter=',', index_col=0)
print("Nb rows in df_training_clinical_data : {}".format(len(df_training_clinical_data)))

file_name = os.path.join(training_features_path, "radiomics.csv")
df_training_radiomics = pd.read_csv(file_name, delimiter=',', index_col=0, skiprows=[0,2], header=[0])
df_training_radiomics.index.names = ["PatientID"]
print("Nb rows in df_training_radiomics : {}".format(len(df_training_radiomics)))

Nb rows in df_training_clinical_data : 300
Nb rows in df_training_radiomics : 300


### Load test data

In [7]:
file_name = os.path.join(test_features_path, "clinical_data.csv")
df_test_clinical_data = pd.read_csv(file_name, delimiter=',', index_col=0)
print("Nb rows in df_training_clinical_data : {}".format(len(df_test_clinical_data)))

file_name = os.path.join(test_features_path, "radiomics.csv")
df_test_radiomics = pd.read_csv(file_name, delimiter=',', index_col=0 , skiprows=[0,2], header=[0])
df_test_radiomics.index.names = ["PatientID"]
print("Nb rows in df_training_radiomics : {}".format(len(df_test_clinical_data)))

Nb rows in df_training_clinical_data : 125
Nb rows in df_training_radiomics : 125


### clinical_data.csv

In [8]:
df_training_clinical_data.sample(5)

Unnamed: 0_level_0,Histology,Mstage,Nstage,SourceDataset,Tstage,age
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
184,Adenocarcinoma,0,0,l2,1,62.0
67,,0,3,l1,1,
143,Adenocarcinoma,0,0,l2,4,53.0
373,large cell,0,2,l1,4,63.4689
297,Adenocarcinoma,0,0,l2,1,79.0


#### Are there NaN values in df_training_clinical_data ?

In [9]:
#df_training_clinical_data.info()
df_training_clinical_data.isnull().sum()

Histology        20
Mstage            0
Nstage            0
SourceDataset     0
Tstage            0
age              16
dtype: int64

### Remark

There are NaN values in columns Histology and age. We will not use these in our study so no problem.

### radiomics.csv

In [10]:
df_training_radiomics.sample(5)

Unnamed: 0_level_0,original_shape_Compactness1,original_shape_Compactness2,original_shape_Maximum3DDiameter,original_shape_SphericalDisproportion,original_shape_Sphericity,original_shape_SurfaceArea,original_shape_SurfaceVolumeRatio,original_shape_VoxelVolume,original_firstorder_Energy,original_firstorder_Entropy,...,original_glrlm_LongRunEmphasis,original_glrlm_GrayLevelNonUniformity,original_glrlm_RunLengthNonUniformity,original_glrlm_RunPercentage,original_glrlm_LowGrayLevelRunEmphasis,original_glrlm_HighGrayLevelRunEmphasis,original_glrlm_ShortRunLowGrayLevelEmphasis,original_glrlm_ShortRunHighGrayLevelEmphasis,original_glrlm_LongRunLowGrayLevelEmphasis,original_glrlm_LongRunHighGrayLevelEmphasis
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
251,0.033073,0.388648,34.496377,1.370297,0.729769,2458.617763,0.344035,7178.0,741441089.0,4.544221,...,1.464765,391.44716,5143.171037,0.884262,0.004032,1136.728463,0.00393,1014.955289,0.004536,1843.339976
373,0.023381,0.194237,108.245092,1.726722,0.579132,26328.766843,0.148712,177167.0,888191920.0,2.372933,...,5.870183,22095.14437,43428.292488,0.560435,0.000636,1772.359661,0.000454,1175.076244,0.003305,10700.521271
327,0.018856,0.126326,69.555733,1.992977,0.501762,3715.073474,0.490903,7627.0,223209648.0,4.152156,...,2.211258,370.44388,4177.921633,0.78558,0.003498,591.812038,0.003176,489.532428,0.005673,1538.74532
390,0.022387,0.178068,23.323808,1.777481,0.562594,1196.345059,0.728627,1673.0,285207606.0,5.418515,...,1.176025,41.896018,1443.835826,0.949193,0.006416,1138.64267,0.006292,1078.60615,0.006947,1434.453086
20,0.024233,0.208641,70.221079,1.686035,0.593108,7996.253747,0.260365,30782.0,707311072.0,3.577254,...,3.401745,2188.170163,11683.560151,0.673348,0.001251,1492.166698,0.001092,1119.089351,0.002799,5526.256831


#### Are there NaN values in df_training_radiomics ?

In [11]:
#df_training_radiomics.info()
df_training_radiomics.isnull().sum().sum()

0

### Remark

There are no NaN values in df_training_radiomics.

### Make sure that PatientID are aligned in df_training_clinical_data and df_training_radiomics

In [12]:
(df_training_clinical_data.index.values==df_training_radiomics.index.values).sum()

300

$300$ means that all PatientIDs are aligned in both training dataframes

### Make sure that PatientID are aligned in df_test_clinical_data and df_test_radiomics

In [13]:
(df_test_clinical_data.index.values==df_test_radiomics.index.values).sum()

125

$125$ means that all PatientIDs are aligned in both test dataframes

### Baseline model for survival regression on NSCLC clinical data : Cox proportional hazard (Cox-PH) model

This baseline is trained on a selection of features from both clinical data file and radiomics file. A Cox-PH model was fitted on

- 1 - Tumor sphericity, a measure of the roundness of the shape of the tumor region relative to a sphere, regardless its dimensions (size).
- 2 - The tumor's surface to volume ratio is a measure of the compactness of the tumor, related to its size.
- 3 - The tumor's maximum 3d diameter The biggest diameter measurable from the tumor volume
- 4 - The dataset of origin
- 5 - The N-tumoral stage grading of the tumor describing nearby (regional) lymph nodes involved
- 6 - The tumor's joint entropy, specifying the randomness in the image pixel values
- 7 - The tumor's inverse different, a measure of the local homogeneity of the tumor
- 8 - The tumor's inverse difference moment is another measurement of the local homogeneity of the tumor

### Name of variables

- 1 - original_shape_Sphericity
- 2 - original_shape_SurfaceVolumeRatio
- 3 - original_shape_Maximum3DDiameter
- 4 - l1 (0) or l2 (1)
- 5 - Nstage
- 6 - original_firstorder_Entropy
- 7 - inverse difference (original_glcm_Id)
- 8 - inverse difference moment (original_glcm_Idm) (according to [here](https://static-content.springer.com/esm/art%3A10.1038%2Fncomms5006/MediaObjects/41467_2014_BFncomms5006_MOESM716_ESM.pdf), ctr+F IDMN and [here](https://github.com/cerr/CERR/wiki/GLCM_global_features))

### Remark

Variables used in the baseline use quantitve and qualitive variables. (dataset of origin (l1 or l2)) makes no sens 

In [14]:
df_training_clinical_data.head(5)

Unnamed: 0_level_0,Histology,Mstage,Nstage,SourceDataset,Tstage,age
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
202,Adenocarcinoma,0,0,l2,2,66.0
371,large cell,0,2,l1,4,64.5722
246,squamous cell carcinoma,0,3,l1,2,66.0452
240,nos,0,2,l1,3,59.3566
284,squamous cell carcinoma,0,3,l1,4,71.0554


### Encode SourceDataset ("dataset of origin") with value between 0 and n_datasets-1.

In [15]:
encoder = LabelEncoder()
encoder.fit(df_training_clinical_data["SourceDataset"])
df_training_clinical_data["SourceDataset"] = encoder.transform(df_training_clinical_data["SourceDataset"])

In [16]:
df_training_clinical_data.head(5)

Unnamed: 0_level_0,Histology,Mstage,Nstage,SourceDataset,Tstage,age
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
202,Adenocarcinoma,0,0,1,2,66.0
371,large cell,0,2,0,4,64.5722
246,squamous cell carcinoma,0,3,0,2,66.0452
240,nos,0,2,0,3,59.3566
284,squamous cell carcinoma,0,3,0,4,71.0554


In [17]:
df_X_train = pd.concat([df_training_radiomics[["original_shape_Sphericity", 
                       "original_shape_SurfaceVolumeRatio", 
                       "original_shape_Maximum3DDiameter",
                       "original_firstorder_Entropy",
                       "original_glcm_Id",
                       "original_glcm_Idm"]],
                        df_training_clinical_data[["SourceDataset",
                                                  "Nstage"]]
                       ], sort=False, axis=1)

In [18]:
df_X_train = df_X_train.astype(float)
df_X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 300 entries, 202 to 366
Data columns (total 8 columns):
original_shape_Sphericity            300 non-null float64
original_shape_SurfaceVolumeRatio    300 non-null float64
original_shape_Maximum3DDiameter     300 non-null float64
original_firstorder_Entropy          300 non-null float64
original_glcm_Id                     300 non-null float64
original_glcm_Idm                    300 non-null float64
SourceDataset                        300 non-null float64
Nstage                               300 non-null float64
dtypes: float64(8)
memory usage: 21.1 KB


### AutoML sklearn

In [19]:
data = {"PatientID" :  df_train_output.index.values,
        "SurvivalTime" : df_train_output["SurvivalTime"].values,
        "Event" :  df_train_output["Event"].values
       }
temp = pd.DataFrame(data)
temp = temp.set_index(["PatientID", "Event"])
temp["SurvivalTime"]

PatientID  Event
202        0        1378
371        1         379
246        1         573
240        0         959
284        0        2119
                    ... 
261        0        1540
298        0         946
129        0         559
273        0        1952
366        0         858
Name: SurvivalTime, Length: 300, dtype: int64

In [20]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection._split import BaseShuffleSplit, _validate_shuffle_split
from sklearn.utils import check_random_state
from sklearn.utils.validation import _num_samples

In [21]:
#https://github.com/automl/auto-sklearn/pull/442/files ctr+F Unknown CrossValidator.
#https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_split.py ctrl +F

In [22]:
random_state = 42
test_size = 0.33
n_splits = 1

In [23]:
suffle_split = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
ind_train, ind_test = [_tuple for _tuple in suffle_split._iter_indices(df_X_train, y=df_train_output["SurvivalTime"], groups=None)][0]

In [24]:
class ShuffleSplit(BaseShuffleSplit):
    """
    """
    
    def __init__(self, n_splits=10, test_size=None, train_size=None,
                 random_state=None):
        n_splits = 1 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!! PROBLEM : n_splits stays at 10 even when changing default value

        super().__init__(
            n_splits=n_splits,
            test_size=test_size,
            train_size=train_size,
            random_state=random_state)
        self._default_test_size = 0.1

    def _iter_indices(self, X, y=None, groups=None):
        n_samples = _num_samples(X)
        n_train, n_test = _validate_shuffle_split(
            n_samples, self.test_size, self.train_size,
            default_test_size=self._default_test_size)

        rng = check_random_state(self.random_state)
        for i in range(self.n_splits):
            # random partition
            permutation = rng.permutation(n_samples)
            ind_test = permutation[:n_test]
            ind_train = permutation[n_test:(n_test + n_train)]
            yield ind_train, ind_test

In [25]:
automl = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=60, # in seconds
                                                     per_run_time_limit=30,
                                                     resampling_strategy=ShuffleSplit,
                                                     resampling_strategy_arguments={'n_splits': 1,
                                                    'test_size': 0.33,
                                                    'random_state': random_state})

def my_cindex(solution, prediction, patient_ids, events, ind_train, ind_test):
    prediction = prediction.ravel()
    
    if len(solution)==201:
        ind = ind_train
    else:
        ind = ind_test
       
    print("solution.shape", solution.shape)
    print("prediction.shape", prediction.shape)
    print("patient_ids.shape", patient_ids.shape)
    print("events.shape", events.shape)
    
    #print(autosklearn.metrics.r2(solution, prediction))
    #return autosklearn.metrics.r2(solution, prediction)

    data_solution = {"PatientID" :  patient_ids[ind],
                     "SurvivalTime" : solution,
                     "Event" : events[ind]
                    }
    df_solution = pd.DataFrame(data_solution)
    df_solution = df_solution.set_index("PatientID")
    
    data_prediction = {"PatientID" :  patient_ids[ind],
                       "SurvivalTime" : prediction,
                       "Event" : events[ind]*np.nan
                      }
    df_prediction = pd.DataFrame(data_prediction)
    df_prediction = df_prediction.set_index("PatientID")
    
    print(cindex(df_solution, df_prediction))
    return cindex(df_solution, df_prediction)

cindex_scorer = autosklearn.metrics.make_scorer(
        name="cindex",
        score_func=my_cindex,
        optimum=1,
        greater_is_better=True,
        needs_proba=False,
        needs_threshold=False,
        patient_ids = df_train_output.index.values,
        events = df_train_output["Event"].values,
        ind_train = ind_train,
        ind_test = ind_test
    )

start_time = time.time()
automl.fit(df_X_train.copy(), df_train_output["SurvivalTime"].copy(), dataset_name='data_Q0G7b5t', metric=cindex_scorer) #, metric=cindex_scorer
# Docs for metric in autoML skearln
# https://automl.github.io/auto-sklearn/master/examples/example_metrics.html
# (Metric must be instance of autosklearn.metrics.Scorer.)

#https://automl.github.io/auto-sklearn/master/examples/example_crossvalidation.html
automl.refit(df_X_train.copy(), df_train_output["SurvivalTime"].copy())

execution_time = time.time()-start_time
print("execution_time", execution_time)

# https://automl.github.io/auto-sklearn/master/manual.html
# Cross validation : https://scikit-learn.org/stable/modules/cross_validation.html

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
events.shape (300,)
patient_ids.shape (300,)
0.4999999999986014
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.4999999999986014


  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6608391608373123
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
0.8987276154562688
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6608391608373123
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6608391608373123


  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.666573426571562
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
0.7209236569267474


  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.5731468531452499
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
0.6318567389249463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.666573426571562
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.5731468531452499
0.666573426571562
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6608391608373123
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.5731468531452499
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.666573426571562
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6662937062918426
solution.shape (99,)
predicti

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
solution.shape (99,)
prediction.shape (99,)
events.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
0.669930069928196
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6769230769211835
events.shape (300,)
0.6674125874107205
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
solution.shape (201,)
events.shape (300,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
0.666573426571562
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6787935909512923
events.shape (300,)
0.6690909090890376
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.668251748249879
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6679720279701595
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6685314685295985
solution.shape (99,)
predicti

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


prediction.shape (99,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6679720279701595
0.46503496503366426
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
0.668811188809318
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.49217719132847104
events.shape (300,)
0.6690909090890376
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.668811188809318
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
0.668811188809318
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6690909090890376
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6702097902079155
0.669930069928196
solution.shape (99,)
predictio

patient_ids.shape (300,)
events.shape (300,)
0.666013986012123
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.665454545452684
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6676923076904401
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6662937062918426
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.665454545452684
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6676923076904401
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6662937062918426
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6657342657324036
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6668531468512815
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
0.6769230769211835
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
0.6825174825155734
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
0.668251748249879
patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6741258741239885
0.7008953817147022
patient_ids.shape (300,)
solution.shape (99,)
prediction.shape (99,)
events.shape (300,)
0.6855944055924879
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6741258741239885
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6718881118862325
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
ev

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6542657342639042
0.6839160839141709
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6861538461519269
solution.shape (99,)
solution.shape (201,)
prediction.shape (201,)
prediction.shape (99,)
patient_ids.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6872727272708049
0.861639962298905
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6811188811169759
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6841958041938904
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6875524475505244
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
ev

patient_ids.shape (300,)
events.shape (300,)
0.6881118881099634
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6839160839141709
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6855944055924879
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6844755244736099
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
eve

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
0.6875524475505244
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
0.32559440559349484
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6850349650330488
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
0.6867132867113659
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.3240810556076116
0.6875524475505244
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6861538461519269
soluti

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
0.6878321678302438
prediction.shape (99,)
solution.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6751048951030068
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
solution.shape (201,)
patient_ids.shape (300,)
prediction.shape (201,)
events.shape (300,)
0.6881118881099634
patient_ids.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.7121112158334475
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6850349650330488
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6875524475505244
solutio

0.6923076923057558
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6883916083896828
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6945454545435118
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6811188811169759
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6925874125854753
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6917482517463168
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6895104895085609
solution.

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
0.6906293706274388
prediction.shape (99,)
patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
0.6537062937044652
patient_ids.shape (300,)
events.shape (300,)
0.6900699300679998
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
0.6836363636344513
solution.shape (99,)
patient_ids.shape (300,)
prediction.shape (99,)
events.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
0.6825174825155734
0.7509896324215353
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6897902097882803
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6925874125854753
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6911888111868778
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6895104895085609
solutio

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
0.6780419580400614
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
solution.shape (201,)
prediction.shape (99,)
patient_ids.shape (300,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6836363636344513
0.6807728557957768
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6917482517463168
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6903496503477193
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6906293706274388
solution.shape (99,)
prediction.shape (99,)
pat

prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6841958041938904
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6813986013966954
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patie

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
solution.shape (99,)
patient_ids.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6813986013966954
0.6676923076904401
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
0.702167766257585
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6841958041938904
solution.shape (99,)
predic

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
0.5198601398586857
patient_ids.shape (300,)
events.shape (300,)
0.6841958041938904
solution.shape (99,)
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
prediction.shape (99,)
events.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
0.4311027332700932
0.6813986013966954
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solutio

0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6841958041938904
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6813986013966954
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6864335664316463
solution.

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.676083916082025
0.675524475522586
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
solution.shape (201,)
prediction.shape (201,)
events.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
0.677762237760342
solution.shape (99,)
0.677285579641209
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.668811188809318
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6653146853128243
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6653146853128243
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6843356643337501
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6029370629353764
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6923076923057558
solution.sha

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6095104895087846
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6128671328654186
events.shape (300,)
0.6914685314665973
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6897902097882803
solution.shape (99,)
solution.shape (201,)
prediction.shape (99,)
patient_ids.shape (300,)
prediction.shape (201,)
events.shape (300,)
patient_ids.shape (300,)
0.6903496503477193
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.690150801130358
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6906293706274388
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.68027

events.shape (300,)
0.6844755244736099
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6909090909071584
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6089510489493456
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6931468531449143
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6928671328651947
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6931468531449143
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6889510489491218
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6928671328651947
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.690909

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
0.6892307692288413
patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6770629370610431
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
0.6883916083896828
solution.shape (99,)
patient_ids.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6872727272708049
0.8972667294996255
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6819580419561344
solutio

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
0.6847552447533294
patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6029370629353764
0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
0.6903496503477193
prediction.shape (201,)
solution.shape (99,)
patient_ids.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.8685673892546008
0.6746853146834275
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
prediction.shape (99,)
solution.shape (99,)
prediction.shape (99,)
pat

patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
patient_ids.shape (300,)
solution.shape (99,)
prediction.shape (99,)
events.shape (300,)
0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6903496503477193
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6746853146834275
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
eve

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
solution.shape (99,)
prediction.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
patient_ids.shape (300,)
events.shape (300,)
0.6883916083896828
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
0.6201398601381255
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
0.6878321678302438
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7812912346835237
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
predi

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6746853146834275
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6598601398582942
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
0.6878321678302438
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6663053722896641
events.shape (300,)
0.6883916083896828
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solutio

0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
events.shape (300,)
0.6903496503477193
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6746853146834275
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6892307692288413
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6883916083896828
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6872727272708049
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6878321678302438
solution.

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6819580419561344
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6573426573408186
events.shape (300,)
0.6847552447533294
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6903496503477193
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
0.7463713477844048
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6746853146834275
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6128671328654186
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6029370629353764
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.675524475522586
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
ev

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


patient_ids.shape (300,)
solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6201398601381255
events.shape (300,)
solution.shape (99,)
0.6956643356623897
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6128671328654186
solution.shape (99,)
prediction.shape (99,)
solution.shape (201,)
patient_ids.shape (300,)
events.shape (300,)
prediction.shape (201,)
0.6029370629353764
patient_ids.shape (300,)
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
0.6908576814319596
patient_ids.shape (300,)
events.shape (300,)
0.5731468531452499
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.5198601398586857
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6867132867113659
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6914685314665973
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
e

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)


solution.shape (99,)
events.shape (300,)
prediction.shape (99,)
patient_ids.shape (300,)
0.6945454545435118
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
0.7001398601379017
events.shape (300,)
0.6830769230750124
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6928671328651947
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
solution.shape (201,)
prediction.shape (201,)
patient_ids.shape (300,)
0.6934265734246338
events.shape (300,)
solution.shape (99,)
prediction.shape (99,)
0.6915174363801211
patient_ids.shape (300,)
events.shape (300,)
0.6942657342637922
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6914685314665973
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6895104895085609
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6833

events.shape (300,)
0.7040559440539746
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7076923076903282
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7006993006973408
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6990209790190237
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7018181818162187
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6953846153826703
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7068531468511696
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6965034965015483
patient_ids.shape (300,)
solution.shape (99,)
prediction.shape (99,)
events.shape (300,)
0.6928671328651947
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.697062

events.shape (300,)
0.7029370629350967
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6990209790190237
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7057342657322917
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6981818181798652
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.7015384615364992
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6981818181798652
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6836363636344513
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6937062937043533
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.6889510489491218
solution.shape (99,)
prediction.shape (99,)
patient_ids.shape (300,)
events.shape (300,)
0.704055

In [26]:
automl.resampling_strategy.__name__

'ShuffleSplit'

In [27]:
print(np.c_[automl.cv_results_["status"], automl.cv_results_["mean_test_score"]])

[['Success' '0.6608391608373123']
 ['Success' '0.666573426571562']
 ['Success' '0.5731468531452499']
 ['Success' '0.6769230769211835']
 ['Success' '0.46503496503366426']
 ['Success' '0.6867132867113659']
 ['Success' '0.6542657342639042']
 ['Success' '0.32559440559349484']
 ['Success' '0.6751048951030068']
 ['Success' '0.6537062937044652']
 ['Success' '0.6780419580400614']
 ['Success' '0.6676923076904401']
 ['Success' '0.5198601398586857']
 ['Success' '0.675524475522586']
 ['Success' '0.6128671328654186']
 ['Success' '0.6770629370610431']
 ['Success' '0.6029370629353764']
 ['Success' '0.6201398601381255']
 ['Success' '0.6598601398582942']
 ['Success' '0.6573426573408186']
 ['Success' '0.6956643356623897']
 ['Success' '0.7001398601379017']]


In [28]:
print("{:0.2f}".format(automl.cv_results_["mean_test_score"].max()))
print()
print(automl.sprint_statistics())
#automl._automl[0]._metric.name

0.70

auto-sklearn results:
  Dataset name: data_Q0G7b5t
  Metric: cindex
  Best validation score: 0.700140
  Number of target algorithm runs: 22
  Number of successful target algorithm runs: 22
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0



In [29]:
all_information = automl.get_models_with_weights()
index_regressor = 5
weights = []
print("Models used with corresponding weights :\n")
for weight, simple_regression_pipeline in all_information:
    print(str(weight)+" : "+simple_regression_pipeline[index_regressor].choice.__class__.__name__)
    weights.append(weight)
print()
print("sum(weights) = ", np.round(sum(weights),2))

Models used with corresponding weights :

0.66 : DecisionTree
0.2 : ExtraTreesRegressor
0.06 : LibSVM_SVR
0.04 : RidgeRegression
0.04 : XGradientBoostingRegressor

sum(weights) =  1.0


### Test

In [30]:
df_test_clinical_data["SourceDataset"] = encoder.transform(df_test_clinical_data["SourceDataset"])

In [31]:
df_X_test = pd.concat([df_test_radiomics[["original_shape_Sphericity", 
                       "original_shape_SurfaceVolumeRatio", 
                       "original_shape_Maximum3DDiameter",
                       "original_firstorder_Entropy",
                       "original_glcm_Id",
                       "original_glcm_Idm"]],
                        df_test_clinical_data[["SourceDataset",
                                                  "Nstage"]]
                       ], sort=False, axis=1)

In [32]:
df_X_test = df_X_test.astype(float)
df_X_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 125 entries, 13 to 274
Data columns (total 8 columns):
original_shape_Sphericity            125 non-null float64
original_shape_SurfaceVolumeRatio    125 non-null float64
original_shape_Maximum3DDiameter     125 non-null float64
original_firstorder_Entropy          125 non-null float64
original_glcm_Id                     125 non-null float64
original_glcm_Idm                    125 non-null float64
SourceDataset                        125 non-null float64
Nstage                               125 non-null float64
dtypes: float64(8)
memory usage: 8.8 KB


In [33]:
y_hat = automl.predict(df_X_test)

In [34]:
df_predicted_survival_time = pd.read_csv(os.path.join(submission_file_path, "random_submission_0vhlEZN.csv"), index_col=0)
df_predicted_survival_time.head(5)

Unnamed: 0_level_0,SurvivalTime,Event
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1
13,788.417673,
155,427.650092,
404,173.587222,
407,389.877973,
9,1580.767244,


In [35]:
df_predicted_survival_time.index = df_test_clinical_data.index
df_predicted_survival_time["SurvivalTime"] = y_hat

In [36]:
df_predicted_survival_time.head(5)

Unnamed: 0_level_0,SurvivalTime,Event
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1
13,627.79505,
155,1187.88936,
404,622.049064,
407,646.775876,
9,1144.756418,


## $\color{red}{\text{To be continued}}$

### Annexe (just to check detail on training data)

In [37]:
y_hat = automl.predict(df_X_train)

df_hat = df_train_output.copy()
df_hat["SurvivalTime"] = y_hat
df_hat["Event"] = np.nan
cindex(df_train_output , df_hat)

0.6788787648694384