In [138]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
import csv
import os
import warnings
from sklearn.model_selection import train_test_split
from sklearn.exceptions import ConvergenceWarning
import joblib
from stg import STG
import stg.utils as utils
import torch
from torch.utils.data import DataLoader, TensorDataset

In [139]:
 
# Suppress all warnings
warnings.filterwarnings('ignore')


In [140]:
model_used = 'Light_GBM'

selection_method = 'Stochastic Gates'
# selection_method = 'lasso'

Light_GBM_global = [
    "flair_t1ce_t2",
    "flair_t1ce_t2",
    "flair",
    "flair_t1ce",
    "flair_t1ce"
]
Light_GBM_local = [
    "flair_t1ce_t2",
    "flair_t1ce",
    "flair",
    "t2",
    "flair_t1ce",
]


In [141]:
def ensure_directory_exists(filepath):
    os.makedirs(os.path.dirname(filepath), exist_ok=True)


In [142]:
def load_radiomic_fetures(target_directory, file_name):
    # Load the numpy array from the file in the target directory
    file_path = os.path.join(target_directory, file_name)
    array = np.load(file_path)
    print(f"Array loaded from '{file_path}'")
    return array

In [143]:
def save_model_and_parameters(model, modality_used, mask, params):
    model_dir = f"./models/{selection_method}_feture_selection/{model_used}/{modality_used}/"
    ensure_directory_exists(model_dir)
    model_file = os.path.join(model_dir, 'model.joblib')
    params_file = os.path.join(model_dir, 'params.txt')
    mask_file = os.path.join(model_dir, f'{selection_method}_mask.npy')
    
    # Save the model
    joblib.dump(model, model_file)
    
    # Save the parameters
    with open(params_file, 'w') as file:
        file.write(f"Best parameters: {params}\n")
    
    # Save the selected features mask
    np.save(mask_file, mask)
    
    print(f"Model, parameters, and {selection_method} mask saved successfully for modality {modality_used}.")

In [144]:
def load_and_combine_features(modality_keys, dataset_type):
    combined_features = []
    for modality in modality_keys:
        # Load the features for each modality
        features = np.load(f'../local_spatial_Framework/features/{modality}/{dataset_type}/{dataset_type}_backbone_outputs.npy')
        combined_features.append(features)
    # Combine features along the feature dimension (axis=1)
    return np.concatenate(combined_features, axis=1)


In [145]:

def make_csv(y_pred_validation, modality_used):
    df = pd.read_csv('../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv')
    validation_ids = df['BraTS20ID'].values
    filename = f"../radiomics_local_global_predictions/{selection_method}_feture_selection/{model_used}/{modality_used}_{model_used}.csv"

    ensure_directory_exists(filename)

    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["ID", "Days"])
        for id, day in zip(validation_ids, y_pred_validation):
            writer.writerow([id, day])

    print(f"CSV file '{filename}' created successfully.")


In [146]:
def load_features(modality_used):
    base_dir = os.path.join('../Global_extracted_features', modality_used)
    train_features = np.load(os.path.join(base_dir, 'train_features.npy'))
    validate_features = np.load(os.path.join(base_dir, 'validate_features.npy'))
    train_labels = np.load(os.path.join(base_dir, 'train_labels.npy'))
    return train_features, validate_features, train_labels


In [147]:

def train_model(train_features, validate_features, train_labels, modality_used):
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    validate_features_scaled = scaler.transform(validate_features)
    
    # Print feature size before Lasso feature selection
    print(f"Size of features before Lasso: {train_features_scaled.shape}")

    # Lasso Feature Selection with increased regularization
    lasso = LassoCV(cv=5, random_state=42, max_iter=100, alphas=np.logspace(-4, -0.5, 30)).fit(train_features_scaled, train_labels)
    
    # Select non-zero coefficients
    mask = lasso.coef_ != 0
    train_features_selected = train_features_scaled[:, mask]
    validate_features_selected = validate_features_scaled[:, mask]

    # Check if any features were selected
    if train_features_selected.shape[1] == 0:
        print(f"No features selected for modality {modality_used}. Skipping this combination.")
        return

    # Print feature size after Lasso feature selection
    print(f"Size of features after Lasso: {train_features_selected.shape}")

    param_dist = {
        'num_leaves': [31, 63, 127],
        'max_depth': [-1, 5, 10, 20],
        'learning_rate': [0.01, 0.05, 0.1],
        'n_estimators': [100, 200, 300],
        'min_child_samples': [10, 20, 30]
    }

    lgb_model = lgb.LGBMRegressor(random_state=42)
    random_search = RandomizedSearchCV(lgb_model, param_distributions=param_dist, n_iter=20, cv=5, random_state=42, n_jobs=-1)
    random_search.fit(train_features_selected, train_labels)

    y_pred_validation = random_search.predict(validate_features_selected)
    make_csv(y_pred_validation, modality_used)
    
    # Save the model, parameters, and Lasso mask
    save_model_and_parameters(random_search.best_estimator_, modality_used, mask, random_search.best_params_)

In [148]:
def train_model_with_stochastic_gates(train_features, test_features, train_labels, modality_used, device='cpu', validation_split=0.2):
    # Split training data into train and validation sets for STG
    train_features_stg, val_features_stg, train_labels_stg, val_labels_stg = train_test_split(
        train_features, train_labels, test_size=validation_split, random_state=42)

    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features_stg)
    val_features_scaled = scaler.transform(val_features_stg)
    test_features_scaled = scaler.transform(test_features)
    
    # Ensure data is in numpy array format
    train_features_scaled = np.asarray(train_features_scaled)
    val_features_scaled = np.asarray(val_features_scaled)
    train_labels_stg = np.asarray(train_labels_stg)
    val_labels_stg = np.asarray(val_labels_stg)
    test_features_scaled = np.asarray(test_features_scaled)
    
    print(f"Size of training features before Stochastic Gates: {train_features_scaled.shape}")
    print(f"Size of validation features before Stochastic Gates: {val_features_scaled.shape}")

    # Define and train the STG model
    stg = STG(task_type='regression', input_dim=train_features_scaled.shape[1], output_dim=1,
              hidden_dims=[100, 50], activation='relu', optimizer='Adam', learning_rate=0.01,
              batch_size=32, feature_selection=True, device=device)
    
    try:
        # Fit the STG model with train data and validate on the validation set
        stg.fit(train_features_scaled, train_labels_stg, nr_epochs=10000, verbose=1, print_interval=5000,
                valid_X=val_features_scaled, valid_y=val_labels_stg)
    except Exception as e:
        print(f"Error during STG fitting: {str(e)}")
        print(f"Train features shape: {train_features_scaled.shape}")
        print(f"Train labels shape: {train_labels_stg.shape}")
        print(f"Validation features shape: {val_features_scaled.shape}")
        print(f"Validation labels shape: {val_labels_stg.shape}")
        return

    # Get feature importance scores using get_gates method
    try:
        importance_scores = stg.get_gates(mode='prob') 
    except Exception as e:
        print(f"Error getting feature importance: {str(e)}")
        print("Using all features.")
        importance_scores = np.ones(train_features_scaled.shape[1])

    # Select features with importance scores above a threshold
    mask = importance_scores > 0.5  # Adjust threshold as necessary
    
    # Apply feature selection to the full training set and test set
    train_features_selected = scaler.fit_transform(train_features)[:, mask]
    test_features_selected = scaler.transform(test_features)[:, mask]

    # Check if any features were selected
    if train_features_selected.shape[1] == 0:
        print(f"No features selected for modality {modality_used}. Using all features.")
        train_features_selected = scaler.fit_transform(train_features)
        test_features_selected = scaler.transform(test_features)

    print(f"Size of features after Stochastic Gates: {train_features_selected.shape}")

    # Rest of the function remains the same
    param_dist = {
        'num_leaves': [31, 63, 127],
        'max_depth': [-1, 5, 10, 20],
        'learning_rate': [0.01, 0.05, 0.1],
        'n_estimators': [100, 200, 300],
        'min_child_samples': [10, 20, 30]
    }

    lgb_model = lgb.LGBMRegressor(random_state=42)
    random_search = RandomizedSearchCV(lgb_model, param_distributions=param_dist, n_iter=20, cv=5, random_state=42, n_jobs=-1)
    random_search.fit(train_features_selected, train_labels)

    y_pred_test = random_search.predict(test_features_selected)
    make_csv(y_pred_test, modality_used)
    
    # Save the model, parameters, and Stochastic Gates mask
    save_model_and_parameters(random_search.best_estimator_, modality_used, mask, random_search.best_params_)


In [149]:

radiomic_Train_fetures = load_radiomic_fetures("../radiomics features/all", "radiomics_train.npy")
radiomic_Validation_fetures = load_radiomic_fetures("../radiomics features/all", "radiomics_validate.npy")

print("radiomic_Train_fetures",radiomic_Train_fetures.shape)
print("radiomic_Validation_fetures",radiomic_Validation_fetures.shape)

Array loaded from '../radiomics features/all/radiomics_train.npy'
Array loaded from '../radiomics features/all/radiomics_validate.npy'
radiomic_Train_fetures (235, 400)
radiomic_Validation_fetures (28, 400)


In [150]:
for modality_used_global, modality_used_local in zip(Light_GBM_global, Light_GBM_local):
    modality_key_local = modality_used_local.split("_")
    modality_keys_list_global_features = modality_used_global.split("_")
    print(f"\nLoading and combining features... \n local-{modality_used_local}\n global-{modality_used_global}")

    local_train_features = load_and_combine_features(modality_key_local, 'train')
    local_validation_features = load_and_combine_features(modality_key_local, 'validation')
    
    global_train_features, global_validate_features, train_labels = load_features(modality_used_global)
    
    print("global_train_features",global_train_features.shape, "local_train_features", local_train_features.shape)
    print("global_validate_features", global_validate_features.shape, "local_validation_features",local_validation_features.shape)


    local_global_training_features = np.concatenate((global_train_features, local_train_features), axis=1)
    local_global_validation_features = np.concatenate((global_validate_features, local_validation_features), axis=1)

    train_labels = np.delete(train_labels, 98, axis=0)
    local_global_training_features = np.delete(local_global_training_features, 98, axis=0)
    local_global_validation_features = np.delete(local_global_validation_features, 27, axis=0)


    training_all_features = np.concatenate((local_global_training_features, radiomic_Train_fetures), axis=1)
    validation_all_features = np.concatenate((local_global_validation_features,radiomic_Validation_fetures), axis=1)

    print("combining all")
    print("radiomics_local_global_training",training_all_features.shape)
    print("radiomics_local_global_Validation",validation_all_features.shape)

    modality_used = 'global_' + modality_used_global + '___local_' + modality_used_local
    if selection_method == "Stochastic Gates":
        train_model_with_stochastic_gates(training_all_features, validation_all_features, train_labels, modality_used)
    elif selection_method == "lasso":
        train_model(training_all_features, validation_all_features, train_labels, modality_used)
    print("\n-----------------------------------------------------------------------------------------------------------\n")


Loading and combining features... 
 local-flair_t1ce_t2
 global-flair_t1ce_t2
global_train_features (236, 744) local_train_features (236, 192)
global_validate_features (29, 744) local_validation_features (29, 192)
combining all
radiomics_local_global_training (235, 1336)
radiomics_local_global_Validation (28, 1336)
Size of training features before Stochastic Gates: (188, 1336)
Size of validation features before Stochastic Gates: (47, 1336)
Epoch: 5000: loss=123144.934896 valid_loss=159005.156250
Epoch: 10000: loss=123152.944010 valid_loss=160042.921875
Size of features after Stochastic Gates: (235, 493)
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079639 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28293
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052850 seconds.
You can set `force_col_

## For stocastic Gates

Loading and combining features... 
 local-flair_t1ce_t2
 global-flair_t1ce_t2
global_train_features (236, 744) local_train_features (236, 192)
global_validate_features (29, 744) local_validation_features (29, 192)
combining all
radiomics_local_global_training (235, 1336)
radiomics_local_global_Validation (28, 1336)
Size of training features before Stochastic Gates: (188, 1336)
Size of validation features before Stochastic Gates: (47, 1336)
Size of features after Stochastic Gates: (235, 493)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair_t1ce
 global-flair_t1ce_t2
global_train_features (236, 744) local_train_features (236, 128)
global_validate_features (29, 744) local_validation_features (29, 128)
combining all
radiomics_local_global_training (235, 1272)
radiomics_local_global_Validation (28, 1272)
Size of training features before Stochastic Gates: (188, 1272)
Size of validation features before Stochastic Gates: (47, 1272)
Size of features after Stochastic Gates: (235, 500)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair
 global-flair
global_train_features (236, 744) local_train_features (236, 64)
global_validate_features (29, 744) local_validation_features (29, 64)
combining all
radiomics_local_global_training (235, 1208)
radiomics_local_global_Validation (28, 1208)
Size of training features before Stochastic Gates: (188, 1208)
Size of validation features before Stochastic Gates: (47, 1208)
Size of features after Stochastic Gates: (235, 453)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-t2
 global-flair_t1ce
global_train_features (236, 744) local_train_features (236, 64)
global_validate_features (29, 744) local_validation_features (29, 64)
combining all
radiomics_local_global_training (235, 1208)
radiomics_local_global_Validation (28, 1208)
Size of training features before Stochastic Gates: (188, 1208)
Size of validation features before Stochastic Gates: (47, 1208)
Size of features after Stochastic Gates: (235, 506)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair_t1ce
 global-flair_t1ce
global_train_features (236, 744) local_train_features (236, 128)
global_validate_features (29, 744) local_validation_features (29, 128)
combining all
radiomics_local_global_training (235, 1272)
radiomics_local_global_Validation (28, 1272)
Size of training features before Stochastic Gates: (188, 1272)
Size of validation features before Stochastic Gates: (47, 1272)
Size of features after Stochastic Gates: (235, 532)

# for Lasso
Loading and combining features... 
 local-flair_t1ce_t2
 global-flair_t1ce_t2
global_train_features (236, 744) local_train_features (236, 192)
global_validate_features (29, 744) local_validation_features (29, 192)
combining all
radiomics_local_global_training (235, 1336)
radiomics_local_global_Validation (28, 1336)
Size of features before Lasso: (235, 1336)
Size of features after Lasso: (235, 248)


-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair_t1ce
 global-flair_t1ce_t2
global_train_features (236, 744) local_train_features (236, 128)
global_validate_features (29, 744) local_validation_features (29, 128)
combining all
radiomics_local_global_training (235, 1272)
radiomics_local_global_Validation (28, 1272)
Size of features before Lasso: (235, 1272)
Size of features after Lasso: (235, 233)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair
 global-flair
global_train_features (236, 744) local_train_features (236, 64)
global_validate_features (29, 744) local_validation_features (29, 64)
combining all
radiomics_local_global_training (235, 1208)
radiomics_local_global_Validation (28, 1208)
Size of features before Lasso: (235, 1208)
Size of features after Lasso: (235, 234)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-t2
 global-flair_t1ce
global_train_features (236, 744) local_train_features (236, 64)
global_validate_features (29, 744) local_validation_features (29, 64)
combining all
radiomics_local_global_training (235, 1208)
radiomics_local_global_Validation (28, 1208)
Size of features before Lasso: (235, 1208)
Size of features after Lasso: (235, 234)

-----------------------------------------------------------------------------------------------------------

Loading and combining features... 
 local-flair_t1ce
 global-flair_t1ce
global_train_features (236, 744) local_train_features (236, 128)
global_validate_features (29, 744) local_validation_features (29, 128)
combining all
radiomics_local_global_training (235, 1272)
radiomics_local_global_Validation (28, 1272)
Size of features before Lasso: (235, 1272)
Size of features after Lasso: (235, 243)