In [71]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os
import warnings
import joblib
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv
import torch
from torchtuples.practical import MLPVanilla
from sklearn.linear_model import LassoCV
from torchtuples import optim as ttoptim
from sklearn.model_selection import train_test_split
import csv
from torch.utils.data import DataLoader, TensorDataset

from pycox.models.loss import CoxPHLoss
import torchtuples as tt
import matplotlib.pyplot as plt
from lifelines import CoxPHFitter
import pandas as pd
from sklearn.decomposition import PCA
import joblib
from lifelines.utils import to_long_format


In [72]:

# Suppress all warnings
warnings.filterwarnings('ignore')


In [73]:

model_used = 'CoxPH'
selection_method = 'lasso'

Light_GBM_global = [
    # "flair_t1ce_t2",
    # "flair_t1ce_t2",
    # "flair",
    # "flair_t1ce",
    "flair_t1ce"
]
Light_GBM_local = [
    # "flair_t1ce_t2",
    # "flair_t1ce",
    # "flair",
    # "t2",
    "flair_t1ce",
]

In [74]:
train_path = '../dataset/MICCAI_BraTS2020_TrainingData/survival_info.csv'
val_path = '../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv'

In [75]:

def ensure_directory_exists(filepath):
    os.makedirs(os.path.dirname(filepath), exist_ok=True)


In [76]:
def load_radiomic_features(target_directory, file_name):
    file_path = os.path.join(target_directory, file_name)
    array = np.load(file_path)
    print(f"Array loaded from '{file_path}'")
    return array


In [77]:
def save_model_and_parameters(model, modality_used, mask, params, median_survival_times, expected_survival_times, validation_data, selected_features, pca):
 
    
    model_dir = f"./models/{selection_method}_feature_selection/CoxPH/{modality_used}/"
    ensure_directory_exists(model_dir)
    
    # Save the CoxPH model using joblib
    model_file = os.path.join(model_dir, 'coxph_model.pkl')
    joblib.dump(model, model_file)
    print(f"Model saved successfully to '{model_file}'.")

    # Save the parameters
    params_file = os.path.join(model_dir, 'params.txt')
    with open(params_file, 'w') as file:
        file.write(f"Parameters: {params}\n")
    
    # Save the selected features mask
    mask_file = os.path.join(model_dir, f'{selection_method}_mask.npy')
    np.save(mask_file, mask)
    
    # # Save predictions to CSV
    # predictions_file = os.path.join(model_dir, 'predicted_survival_times.csv')
    # validation_data['Median_Survival_Days'] = median_survival_times.values
    # validation_data['Expected_Survival_Days'] = expected_survival_times.values
    # validation_data.to_csv(predictions_file, index=False)
    # print(f"Predictions saved successfully to '{predictions_file}'.")

    # Save the selected feature indices or column names
    feature_selection_path = os.path.join(model_dir, 'selected_features.pkl')
    joblib.dump(selected_features, feature_selection_path)
    print(f"Selected features saved to '{feature_selection_path}'.")

    # Save the PCA object for later use
    pca_path = os.path.join(model_dir, 'pca_model.pkl')
    joblib.dump(pca, pca_path)
    print(f"PCA model saved to '{pca_path}'.")

    print(f"Model, parameters, feature mask, predictions, selected features, and PCA model saved successfully for modality {modality_used}.")

In [78]:
def load_and_combine_features(modality_keys, dataset_type):
    combined_features = []
    for modality in modality_keys:
        features = np.load(f'../local_spatial_Framework/features/{modality}/{dataset_type}/{dataset_type}_backbone_outputs.npy')
        combined_features.append(features)
    return np.concatenate(combined_features, axis=1)


In [79]:
def make_csv(y_pred_validation, modality_used):
    df = pd.read_csv('../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv')
    validation_ids = df['BraTS20ID'].values
    filename = f"../radiomics_local_global_predictions/{selection_method}_feature_selection/{model_used}/{modality_used}_{model_used}.csv"
    

    ensure_directory_exists(filename)

    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["ID", "Days"])
        for id, day in zip(validation_ids, y_pred_validation):
            writer.writerow([id, int(day)])  # Convert to int if necessary

    print(f"CSV file '{filename}' created successfully.")

In [80]:

def load_features(modality_used):
    base_dir = os.path.join('../Global_extracted_features', modality_used)
    train_features = np.load(os.path.join(base_dir, 'train_features.npy'))
    validate_features = np.load(os.path.join(base_dir, 'validate_features.npy'))
    train_labels = np.load(os.path.join(base_dir, 'train_labels.npy'))
    return train_features, validate_features, train_labels

In [81]:
def train_model(train_features, test_features, train_labels, modality_used):
     
    # Ensure 'event' column exists
    if 'event' not in train_labels.columns:
        train_labels['event'] = 1

    # Combine features and labels for training the model
    train_data = pd.concat([train_features, train_labels], axis=1)

    # Debug: Print initial feature shapes
    print(f"Initial training features shape: {train_features.shape}")
    print(f"Initial test features shape: {test_features.shape}")

    # Feature selection: remove highly correlated features
    corr_matrix = train_data.corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    high_corr_features = [column for column in upper.columns if any(upper[column] > 0.9)]
    print(f"Highly correlated features to be removed: {high_corr_features}")

    # Drop highly correlated features
    selected_features = [col for col in train_features.columns if col not in high_corr_features]
    train_features_selected = train_features[selected_features]
    test_features_selected = test_features[selected_features]

    # Debug: Print feature shapes after feature selection
    print(f"Training features shape after feature selection: {train_features_selected.shape}")
    print(f"Test features shape after feature selection: {test_features_selected.shape}")

    # Dimensionality Reduction: Apply PCA
    pca = PCA(n_components=50)
    train_features_pca = pca.fit_transform(train_features_selected)
    test_features_pca = pca.transform(test_features_selected)

    # Debug: Print feature shapes after PCA
    print(f"Training features shape after PCA: {train_features_pca.shape}")
    print(f"Test features shape after PCA: {test_features_pca.shape}")

    train_features_pca_df = pd.DataFrame(train_features_pca)
    test_features_pca_df = pd.DataFrame(test_features_pca)
    train_data_pca = pd.concat([train_features_pca_df, train_labels], axis=1)

    # Initialize CoxPH model with L2 penalization
    coxph = CoxPHFitter(penalizer=1.0)  # Adjust penalizer as needed

    # Fit the model
    coxph.fit(train_data_pca, duration_col='survival_days', event_col='event')
    coxph.print_summary()

    # Predict survival function with PCA-transformed test features
    survival_predictions = coxph.predict_survival_function(test_features_pca_df)

    # Convert survival predictions to median and expected survival times
    median_survival_times = survival_predictions.apply(lambda x: x[x >= 0.5].index.min(), axis=0).fillna(survival_predictions.index.max())
    expected_survival_times = survival_predictions.apply(lambda x: np.trapz(x, x.index), axis=0)
    make_csv(expected_survival_times, modality_used)
    # Save model, parameters, and all necessary data
    save_model_and_parameters(
        model=coxph,
        modality_used=modality_used,
        mask=None,  # If you have a feature selection mask, replace None with the actual mask
        params={'penalizer': 1.0, 'n_components': 50},  # Example parameters; adjust as needed
        median_survival_times=median_survival_times,
        expected_survival_times=expected_survival_times,
        validation_data=test_features,  # Assuming this contains the validation patient IDs
        selected_features=selected_features,
        pca=pca
    )

    return coxph, survival_predictions, selected_features, pca


In [82]:

radiomic_train_features = load_radiomic_features("../radiomics features/all", "radiomics_train.npy")
radiomic_validation_features = load_radiomic_features("../radiomics features/all", "radiomics_validate.npy")

print("radiomic_train_features", radiomic_train_features.shape)
print("radiomic_validation_features", radiomic_validation_features.shape)


Array loaded from '../radiomics features/all/radiomics_train.npy'
Array loaded from '../radiomics features/all/radiomics_validate.npy'
radiomic_train_features (235, 400)
radiomic_validation_features (29, 400)


In [83]:
coxph_model = None
survival_predictions = None

In [84]:
for modality_used_global, modality_used_local in zip(Light_GBM_global, Light_GBM_local):
    modality_key_local = modality_used_local.split("_")
    modality_keys_list_global_features = modality_used_global.split("_")
    print(f"\nLoading and combining features... \n local-{modality_used_local}\n global-{modality_used_global}")

    local_train_features = load_and_combine_features(modality_key_local, 'train')
    local_validation_features = load_and_combine_features(modality_key_local, 'validation')
    
    global_train_features, global_validate_features, train_labels = load_features(modality_used_global)
    
    print("global_train_features", global_train_features.shape, "local_train_features", local_train_features.shape)
    print("global_validate_features", global_validate_features.shape, "local_validation_features", local_validation_features.shape)

    local_global_training_features = np.concatenate((global_train_features, local_train_features), axis=1)
    local_global_validation_features = np.concatenate((global_validate_features, local_validation_features), axis=1)

    train_labels = np.delete(train_labels, 98, axis=0)
    local_global_training_features = np.delete(local_global_training_features, 98, axis=0)

    training_all_features = np.concatenate((local_global_training_features, radiomic_train_features), axis=1)
    validation_all_features = np.concatenate((local_global_validation_features, radiomic_validation_features), axis=1)

    print("Combining all")
    print("radiomics_local_global_training", training_all_features.shape)
    print("radiomics_local_global_validation", validation_all_features.shape)

    modality_used = 'global_' + modality_used_global + '___local_' + modality_used_local

    # Convert training and validation features to DataFrame
    training_all_features_df = pd.DataFrame(training_all_features)
    validation_all_features_df = pd.DataFrame(validation_all_features)
    
    # Convert train_labels to DataFrame if necessary
    train_labels_df = pd.DataFrame(train_labels, columns=['survival_days'])

    # Standardize the features to handle differences in scale
    scaler = StandardScaler()

    training_all_features_scaled = scaler.fit_transform(training_all_features_df)
    validation_all_features_scaled = scaler.transform(validation_all_features_df)

    # Convert back to DataFrame
    training_all_features_scaled_df = pd.DataFrame(training_all_features_scaled, columns=training_all_features_df.columns)
    validation_all_features_scaled_df = pd.DataFrame(validation_all_features_scaled, columns=validation_all_features_df.columns)

    # Train the model
    coxph_model, survival_predictions, selected_features, pca = train_model(training_all_features_scaled_df, validation_all_features_scaled_df, train_labels_df, modality_used)


Loading and combining features... 
 local-flair_t1ce
 global-flair_t1ce
global_train_features (236, 744) local_train_features (236, 128)
global_validate_features (29, 744) local_validation_features (29, 128)
Combining all
radiomics_local_global_training (235, 1272)
radiomics_local_global_validation (29, 1272)
Initial training features shape: (235, 1272)
Initial test features shape: (29, 1272)
Highly correlated features to be removed: [2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 135, 136, 137, 138, 139, 140, 142, 143, 1

0,1
model,lifelines.CoxPHFitter
duration col,'survival_days'
event col,'event'
penalizer,1.0
l1 ratio,0.0
baseline estimation,breslow
number of observations,235
number of events observed,235
partial log-likelihood,-1031.23
time fit was run,2024-08-26 06:39:27 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
0,-0.0,1.0,0.01,-0.01,0.01,0.99,1.01,0.0,-0.31,0.76,0.4
1,0.01,1.01,0.01,-0.0,0.03,1.0,1.03,0.0,1.51,0.13,2.93
2,-0.0,1.0,0.01,-0.03,0.02,0.97,1.02,0.0,-0.3,0.77,0.38
3,-0.01,0.99,0.01,-0.03,0.01,0.97,1.02,0.0,-0.77,0.44,1.19
4,0.0,1.0,0.01,-0.03,0.03,0.97,1.03,0.0,0.05,0.96,0.06
5,0.01,1.01,0.01,-0.02,0.03,0.98,1.03,0.0,0.45,0.65,0.61
6,0.02,1.02,0.01,-0.01,0.04,0.99,1.04,0.0,1.08,0.28,1.84
7,0.01,1.01,0.02,-0.02,0.04,0.98,1.04,0.0,0.56,0.57,0.8
8,0.01,1.01,0.02,-0.02,0.04,0.98,1.04,0.0,0.47,0.64,0.65
9,-0.01,0.99,0.02,-0.04,0.02,0.96,1.03,0.0,-0.49,0.62,0.69

0,1
Concordance,0.71
Partial AIC,2162.45
log-likelihood ratio test,40.85 on 50 df
-log2(p) of ll-ratio test,0.29


CSV file '../radiomics_local_global_predictions/lasso_feature_selection/CoxPH/global_flair_t1ce___local_flair_t1ce_CoxPH.csv' created successfully.
Model saved successfully to './models/lasso_feature_selection/CoxPH/global_flair_t1ce___local_flair_t1ce/coxph_model.pkl'.
Selected features saved to './models/lasso_feature_selection/CoxPH/global_flair_t1ce___local_flair_t1ce/selected_features.pkl'.
PCA model saved to './models/lasso_feature_selection/CoxPH/global_flair_t1ce___local_flair_t1ce/pca_model.pkl'.
Model, parameters, feature mask, predictions, selected features, and PCA model saved successfully for modality global_flair_t1ce___local_flair_t1ce.
