In [13]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
import csv
import os
import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)


model_used = 'Light_GBM'

def ensure_directory_exists(filepath):
    os.makedirs(os.path.dirname(filepath), exist_ok=True)


In [2]:
def load_and_combine_features(modality_keys, dataset_type):
    combined_features = []
    for modality in modality_keys:
        # Load the features for each modality
        features = np.load(f'../local_spatial/features/{modality}/{dataset_type}/{dataset_type}_backbone_outputs.npy')
        combined_features.append(features)
    # Combine features along the feature dimension (axis=1)
    return np.concatenate(combined_features, axis=1)


In [3]:
def make_csv(y_pred_validation, modality_used):
    df = pd.read_csv('../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv')
    validation_ids = df['BraTS20ID'].values
    filename = f"../LASSO_local_global_predictions/{model_used}/{modality_used}_{model_used}.csv"

    ensure_directory_exists(filename)

    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["ID", "Days"])
        for id, day in zip(validation_ids, y_pred_validation):
            writer.writerow([id, day])

    print(f"CSV file '{filename}' created successfully.")


In [4]:
def load_features(modality_used):
    base_dir = os.path.join('../Global_extracted_features', modality_used)
    train_features = np.load(os.path.join(base_dir, 'train_features.npy'))
    validate_features = np.load(os.path.join(base_dir, 'validate_features.npy'))
    train_labels = np.load(os.path.join(base_dir, 'train_labels.npy'))
    return train_features, validate_features, train_labels


In [5]:
def train_model(train_features, validate_features, train_labels, modality_used):
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    validate_features_scaled = scaler.transform(validate_features)

    # Print feature size before Lasso feature selection
    print(f"Size of features before Lasso: {train_features_scaled.shape}")

    # Lasso Feature Selection with increased regularization
    lasso = LassoCV(cv=5, random_state=42, max_iter=10000, alphas=np.logspace(-4, -0.5, 30)).fit(train_features_scaled, train_labels)
    
    # Select non-zero coefficients
    mask = lasso.coef_ != 0
    train_features_selected = train_features_scaled[:, mask]
    validate_features_selected = validate_features_scaled[:, mask]

    # Check if any features were selected
    if train_features_selected.shape[1] == 0:
        print(f"No features selected for modality {modality_used}. Skipping this combination.")
        return

    # Print feature size after Lasso feature selection
    print(f"Size of features after Lasso: {train_features_selected.shape}")

    param_dist = {
        'num_leaves': [31, 63, 127],
        'max_depth': [-1, 5, 10, 20],
        'learning_rate': [0.01, 0.05, 0.1],
        'n_estimators': [100, 200, 300],
        'min_child_samples': [10, 20, 30]
    }

    # lgb_model = lgb.LGBMRegressor(random_state=42)
    # random_search = RandomizedSearchCV(lgb_model, param_distributions=param_dist, n_iter=20, cv=5, random_state=42, n_jobs=-1)
    # random_search.fit(train_features_selected, train_labels)

    # y_pred_validation = random_search.predict(validate_features_selected)
    # make_csv(y_pred_validation, modality_used)


In [6]:
GBM_global = [
    "flair_t1ce",
    "flair_t1_t1ce_t2",
    "flair",
    "flair_t1ce",
    "flair_t1ce",
    "flair_t1ce_t2",
    "flair_t1ce_t2",
    "flair_t1ce_t2",
    "flair"
]
GBM_local = [
    "t2",
    "t1ce",
    "flair",
    "t1",
    "flair_t1ce",
    "flair",
    "flair_t1ce",
    "flair_t1ce_t2",
    "t1"
]

In [7]:
RF_global = [
    "flair_t1ce_t2",
    "flair",
    "flair_t1ce_t2",
    "flair_t1ce_t2",
    "flair",
    "flair",
    "flair",
    "flair",
    "flair_t1ce"
]
RF_local = [
    "flair_t1ce",
    "flair",
    "flair",
    "flair_t1ce_t2",
    "t1",
    "flair_t1ce",
    "flair_t1ce_t2",
    "flair_t1_t1ce_t2",
    "flair_t1_t1ce_t2"
]



In [8]:
XGB_global = [
    "flair",
    "flair",
    "flair_t1ce",
    "flair_t1ce",
    "flair_t1ce_t2",
    "flair_t1ce_t2",
    "flair",
    "flair_t1ce",
    "flair_t1ce"
]
XGB_local = [
    "flair",
    "flair_t1ce",
    "t2",
    "flair_t1_t1ce_t2",
    "flair",
    "t2",
    "flair_t1ce_t2",
    "flair",
    "flair_t1ce"
]



In [9]:
# Define modality keys
modality_keys_list_global_features = [
    ["flair"],
    # ["t1ce"],
    # ["flair", "t1ce"],
    # ["flair", "t1ce", "t2"],
    # ["flair", "t1", "t1ce", "t2"]
]

modality_keys_list_local_features = [
    ["flair"],
    # ["t1ce"],
    # ['t1'],
    # ['t2'],
    # ["flair", "t1ce"],
    # ["flair", "t1ce", "t2"],
    # ["flair", "t1", "t1ce", "t2"]
]

In [16]:
for modality_used_global, modality_used_local in zip(XGB_global, XGB_local):
    modality_key_local = modality_used_local.split("_")
    modality_keys_list_global_features = modality_used_global.split("_")
    print(f"\nLoading and combining features... \n local-{modality_used_local}\n global-{modality_used_global}")
    local_train_features = load_and_combine_features(modality_key_local, 'train')
    local_validation_features = load_and_combine_features(modality_key_local, 'validation')

    global_train_features, global_validate_features, train_labels = load_features(modality_used_global)

    combined_training_features = np.concatenate((global_train_features, local_train_features), axis=1)
    combined_validation_features = np.concatenate((global_validate_features, local_validation_features), axis=1)

    modality_used = 'global_' + modality_used_global + '___local_' + modality_used_local
    train_model(combined_training_features, combined_validation_features, train_labels, modality_used)





Loading and combining features... 
 local-flair
 global-flair
Size of features before Lasso: (236, 808)
Size of features after Lasso: (236, 163)

Loading and combining features... 
 local-flair_t1ce
 global-flair
Size of features before Lasso: (236, 872)
Size of features after Lasso: (236, 168)

Loading and combining features... 
 local-t2
 global-flair_t1ce
Size of features before Lasso: (236, 808)
Size of features after Lasso: (236, 157)

Loading and combining features... 
 local-flair_t1_t1ce_t2
 global-flair_t1ce
Size of features before Lasso: (236, 1000)
Size of features after Lasso: (236, 166)

Loading and combining features... 
 local-flair
 global-flair_t1ce_t2
Size of features before Lasso: (236, 808)
Size of features after Lasso: (236, 184)

Loading and combining features... 
 local-t2
 global-flair_t1ce_t2
Size of features before Lasso: (236, 808)
Size of features after Lasso: (236, 186)

Loading and combining features... 
 local-flair_t1ce_t2
 global-flair
Size of features

In [None]:
 

# for modality_key_global in modality_keys_list_global_features:
#     for modality_key_local in modality_keys_list_local_features:
#         # Load the combined features
#         modality_used_local = "_".join(modality_key_local)
#         print("Loading and combining features...")
#         local_train_features = load_and_combine_features(modality_key_local, 'train')
#         local_validation_features = load_and_combine_features(modality_key_local, 'validation')

#         modality_used_global = "_".join(modality_key_global)
#         global_train_features, global_validate_features, train_labels = load_features(modality_used_global)

#         combined_training_features = np.concatenate((global_train_features, local_train_features), axis=1)
#         combined_validation_features = np.concatenate((global_validate_features, local_validation_features), axis=1)

#         modality_used = 'global_' + modality_used_global + '___local_' + modality_used_local
#         train_model(combined_training_features, combined_validation_features, train_labels, modality_used)


Loading and combining features...
Size of features before Lasso: (236, 808)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

Size of features after Lasso: (236, 163)


  model = cd_fast.enet_coordinate_descent(


- Size of features before Lasso: (236, 808)
- Size of features after Lasso: (236, 163)