In [1]:
import yaml
import sys

sys.path.append('../../')
from Utils.Get_adni import post_imputation_adni

from sksurv.datasets import get_x_y
from sksurv.ensemble import RandomSurvivalForest
from loguru import logger
#Grid search with cross-validation
from sklearn.model_selection import GridSearchCV, KFold
from sksurv.ensemble import RandomSurvivalForest


# Random Survival Forest hyperparameter search 

In [5]:
#Inspect what hyperparameters works the best, but only on the first dataset
from sksurv.metrics import as_concordance_index_ipcw_scorer, concordance_index_censored

def hyperparameter_search(df_train, name):
    param_grid = {
        'n_estimators': [200, 400, 600, 800],
        'min_samples_split': [3, 5, 10],
        'min_samples_leaf': [3, 5, 10, 15],
        'max_features': ['sqrt', 'log2']
    }

    random_state = 42

    rsf_test = RandomSurvivalForest(n_jobs=-1, random_state=random_state)

    cv = KFold(n_splits=5) #, random_state=random_state, shuffle=True
    
    as_concordance_index_ipcw_scorer(rsf_test)
    grid_search = GridSearchCV(rsf_test, 
                            param_grid, 
                            cv=cv, 
                            error_score=0.5,
                            n_jobs=12, 
                            verbose=3)

    x_train, y_train = get_x_y(df_train, attr_labels=["Event", "M"], pos_label=1)

    grid_search.fit(x_train, y_train)

    # Optionally, print the best parameters and best score
    logger.info("Best parameters:", grid_search.best_params_)
    logger.info("Best score:", grid_search.best_score_)

    filepath = f'./ADNI_{name}.yaml'

    best_params = grid_search.best_params_
    best_score = float(grid_search.best_score_)

    best_details = {
        'Best Parameters': best_params,
        'Best Score': best_score
    }

    with open(filepath, 'w') as file:
        yaml.dump(best_details, file, default_flow_style=False)
    return best_params, best_score


In [6]:
path = "../../Datasets/Imputed/Merge_BL/"
dfs_train_mci, _, config = post_imputation_adni(path, 
                                                   only_first=True, 
                                                   verbose=True,
                                                   dxs=[1])

dfs_train, _, _ = post_imputation_adni(path, 
                                                   only_first=True, 
                                                   verbose=True,
                                                   dxs=[0, 1])


datasets =  [dfs_train_mci[0], dfs_train[0]]
names = ['MCI', 'All']
params = []
scores = []

# dfs_train, dfs_test, config_impute = prepare_dataset(path)

for name, df_train in zip(names, datasets):
    
    logger.info(f"Config: {config}")
    logger.info(f"Finding optimal hyperparameters for method {name}")
    param, score = hyperparameter_search(df_train, name)
    
    params.append(param)
    scores.append(score)


[32m2024-05-31 15:26:33.463[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mConfig: {'cohort': ['ADNI1', 'ADNI2', 'ADNI3', 'ADNIGO'], 'device': 'cpu', 'directory_name': 'Merge_BL', 'drop_empty_dx': True, 'fill_dx': True, 'missing_value_cutoff': 0.6, 'num_datasets': 20, 'num_iterations': 20, 'num_threads': 14, 'prepare_sa': True, 'quality': False, 'random_state': 1991, 'save_all_iterations': True, 'set_mean_match_candidates': 5, 'train_test_split': 0.2}[0m
[32m2024-05-31 15:26:33.465[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m23[0m - [1mFinding optimal hyperparameters for method MCI[0m


Configuration file: ..\..\Datasets\Imputed\Merge_BL\setup.yaml

Preparing dataset 0...

Configuration file: ..\..\Datasets\Imputed\Merge_BL\setup.yaml

Preparing dataset 0...

Fitting 5 folds for each of 96 candidates, totalling 480 fits


[32m2024-05-31 15:28:42.470[0m | [1mINFO    [0m | [36m__main__[0m:[36mhyperparameter_search[0m:[36m27[0m - [1mBest parameters:[0m
[32m2024-05-31 15:28:42.472[0m | [1mINFO    [0m | [36m__main__[0m:[36mhyperparameter_search[0m:[36m28[0m - [1mBest score:[0m
[32m2024-05-31 15:28:42.490[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mConfig: {'cohort': ['ADNI1', 'ADNI2', 'ADNI3', 'ADNIGO'], 'device': 'cpu', 'directory_name': 'Merge_BL', 'drop_empty_dx': True, 'fill_dx': True, 'missing_value_cutoff': 0.6, 'num_datasets': 20, 'num_iterations': 20, 'num_threads': 14, 'prepare_sa': True, 'quality': False, 'random_state': 1991, 'save_all_iterations': True, 'set_mean_match_candidates': 5, 'train_test_split': 0.2}[0m
[32m2024-05-31 15:28:42.491[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m23[0m - [1mFinding optimal hyperparameters for method All[0m


Fitting 5 folds for each of 96 candidates, totalling 480 fits


[32m2024-05-31 15:32:58.574[0m | [1mINFO    [0m | [36m__main__[0m:[36mhyperparameter_search[0m:[36m27[0m - [1mBest parameters:[0m
[32m2024-05-31 15:32:58.578[0m | [1mINFO    [0m | [36m__main__[0m:[36mhyperparameter_search[0m:[36m28[0m - [1mBest score:[0m
