In [None]:
cpu_1 = 7
cpu_2 = 7
@Cache(
    cache_path="results/{experiment_name}.json"
)
def experiment(path, experiment_name, experiment_setup):
    all_performance = []
    for dataset in dataset_list:
        dataset_variant = dataset["dataset_variant"]
        print(dataset_variant)

        X = dataset["X"].to_numpy()
        y = dataset["y"].to_numpy(dtype = 'int32').ravel()

        #TYPE OF TASK
        task = experiment_setup['task']
        
        #THE MASK IS REQUIRED WHEN MASKEDPCA OR MASKEDSVD IS USED
        if experiment_setup["dataset_settings"]["only_BMI"] == 1:
            mask = np.arange(X.shape[1]) > 2
        else:
            mask = np.arange(X.shape[1]) > 4

        #BUILDING THE PIPELINE
        pipe_steps = []
        for key, value in experiment_setup["pipe"].items():
            pipe_steps.append((key, eval(value)))
        pipe = Pipeline(pipe_steps)    
        
        hp_optimizer = experiment_setup["hp_optimizer"]
        metrics = hp_optimizer.get("metrics")
        
        params_list = []
        parsed_params = {}
        for parameter, values in hp_optimizer.get("params")[0].items():
            parsed_params[parameter] = eval(values)
        params_list.append(parsed_params)
            
        list_skf = []
                
        if "n_split_outer_cv" in hp_optimizer.keys():
            cv_type = "n_split_outer_cv"
            list_skf.append(StratifiedKFold(n_splits=hp_optimizer[cv_type], shuffle = True, random_state=42))
        
        if "n_split_inner_cv" in hp_optimizer.keys():
            cv_type = "n_split_inner_cv"
            list_skf.append(StratifiedKFold(n_splits=hp_optimizer[cv_type], shuffle = True, random_state=42))
            
        if hp_optimizer["type"] == 'GridSearchCV':
            optimizer = GridSearchCV(pipe, parsed_params, n_jobs = cpu_1, cv=list_skf[-1], verbose=1, scoring = metrics, refit = metrics[0], return_train_score=True).fit(X,y)
        elif hp_optimizer["type"] == 'RandomizeSearchCV':
            n_iter = hp_optimizer["n_iter"] if "n_iter" in hp_optimizer.keys() else 100
            print(f"n_iter:{n_iter}")
            optimizer = RandomizedSearchCV(pipe, parsed_params, n_iter = n_iter, cv=list_skf[-1], verbose=1, scoring = metrics, refit = metrics[0], return_train_score=True).fit(X,y)
        elif hp_optimizer["type"] == 'BayesSearchCV':
            n_iter = hp_optimizer["n_iter"] if "n_iter" in hp_optimizer.keys() else 100
            print(f"n_iter:{n_iter}")  
            optimizer = BayesSearchCV(pipe, parsed_params, n_jobs = cpu_1, n_iter = n_iter, cv=list_skf[-1], verbose=1, scoring = metrics, refit = metrics[0], return_train_score=True).fit(X,y)

        if "n_split_inner_cv" in hp_optimizer.keys():
            cv_dic = cross_validate(optimizer, X, y, cv=list_skf[0], scoring=metrics, return_estimator=True, verbose = 2, return_train_score=True)
            best_params_cv = [estimator.best_params_ for estimator in cv_dic["estimator"]]
            
            scores_test_dict = {}
            scores_train_dict = {}
            for metric in metrics:
                scores_test_dict[metric] = np.mean(cv_dic[f"test_{metric}"])
                scores_train_dict[metric] = np.mean(cv_dic[f"train_{metric}"])

            cv_results = str(cv_dic)
        else:
            best_params_cv = [optimizer.best_params_]
            best_model = pd.DataFrame(optimizer.cv_results_).iloc[optimizer.best_index_]

            scores_test_dict = {}
            scores_train_dict = {}
            for metric in metrics:
                scores_test_dict[metric] = best_model[f"mean_test_{metric}"]
                scores_train_dict[metric] = best_model[f"mean_train_{metric}"]

            cv_results = str(optimizer.cv_results_)
            
        score = {
            "experiment_name": experiment_name,
            "dataset_variant": dataset_variant,
            "estimator" : experiment_setup['pipe']['estimator'],
            "task": task,
            "hp_optimizer": hp_optimizer['type'],
            "cv_type": cv_type,
            "mean_test_score": scores_test_dict,
            "mean_train_score": scores_train_dict,
            "best_params": str(best_params_cv),
            "cv_results" : cv_results,
            "experiment_setup": experiment_setup
        }
        
        all_performance.append(score)
                
    return all_performance