## Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import random
from sklearn.metrics import accuracy_score
from deap import base, creator, tools, algorithms
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os
import timeit
import copy
from sklearn.ensemble import AdaBoostClassifier
from scipy import stats as st
import time
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import recall_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold, cross_val_score
from bayes_opt import BayesianOptimization
import warnings
import multiprocessing
from hyperopt import fmin, tpe, hp, Trials
import smtplib, ssl
warnings.filterwarnings("ignore")

## Global definitions

In [2]:
datasets_path = "../input/150-classification/classification_datasets/"
results_path = "../input/results/result_df.csv"
ada_path = "../input/adaours/ada-ours.csv"

## Side Checking

In [3]:
# df_results = pd.read_csv(ada_path)
# # df_results.head()
# df_results["diff"] = df_results.apply(lambda x: x["HGARF_test_score"] - x["ada_test_score"], axis=1)
# df_results["Status"] = df_results["diff"].apply(lambda x: "Win" if x > 0 else "Loss" if x < 0 else "Tie")
# df_results[df_results.dataset=='cardiotocography-10clases']

## Done side checking

In [4]:
list_of_decision_trees = None
population_with_predictions = None
# pool = multiprocessing.Pool()



def register_stats():
    global stats
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)


def create_forest(total_number_of_trees, num_of_features):
    global population_with_predictions
    
    RF_PREDICTIONS_X_VAL = []
    RF = []
    
    
    X, y = x_training, y_training
    if num_of_features <= 2:
        curr_tree = RandomForestClassifier(n_estimators=int(total_number_of_trees),
                                           max_features=num_of_features)
        curr_tree.fit(X, y)
        RF.append(curr_tree)
    else:
        for i in range(2, num_of_features):
            curr_tree = RandomForestClassifier(n_estimators=int(total_number_of_trees / (num_of_features - 2)),
                                               max_features=i)
            curr_tree.fit(X, y)
            RF.append(curr_tree)

    # Flatten the list to one list of N decition trees
    RF = [item for sublist in RF for item in sublist]
    
    for tree in RF:
        RF_PREDICTIONS_X_VAL.append(tree.predict(x_val))
        
    population_with_predictions = np.array(RF_PREDICTIONS_X_VAL)
    
    return RF

def create_population(list_of_decision_trees, pop_size, chromosome_length):
    population = []
    for _ in range(pop_size):
        individual = random.sample(list_of_decision_trees, chromosome_length)
        population.append(individual)
    return population

In [5]:
def cx_one_point(ind1, ind2):
    cxpoint = random.randint(0, len(ind1))

    old_ind2 = ind2[cxpoint:]
    old_ind1 = ind1[cxpoint:]

    intersection = set(ind1[cxpoint:]).intersection(set(old_ind2))
    while len(intersection) > 0:
        for intersect in intersection:
            old_ind2.remove(intersect)
            old_ind2.append(random.randint(0,len(list_of_decision_trees)-1))
        intersection = set(ind1[cxpoint:]).intersection(set(old_ind2))

    intersection = set(ind2[cxpoint:]).intersection(set(old_ind1))
    while len(intersection) > 0:
        for intersect in intersection:
            old_ind1.remove(intersect)
            old_ind1.append(random.randint(0, len(list_of_decision_trees)-1))
        intersection = set(ind2[cxpoint:]).intersection(set(old_ind1))

    ind1[cxpoint:], ind2[cxpoint:] = old_ind2, old_ind1
    return ind1, ind2

def initialize_evolution_functions(total_number_of_trees, chromosome_length, tour_size):

    global toolbox

#     toolbox.register("map", pool.map)
    
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox.register("attribute", random.randint, 0, total_number_of_trees-1)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attribute, chromosome_length)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evaluate_individual)
    toolbox.register("mate", cx_one_point)
    toolbox.register("mutate", tools.mutUniformInt, low=0, up=total_number_of_trees-1, indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=tour_size)
    register_stats()

    pop = toolbox.population(n=200)
    fitnesses = toolbox.map(toolbox.evaluate, pop)
    best_ind = tools.HallOfFame(1)
    pop = update_fitnesses_to_population(pop,fitnesses)

    return pop, best_ind

def evaluate_individual(individual, test=False):

    if test:
        X, y = x_test, y_test
    else:
        X, y = x_val, y_val

    prediction_results = []
    for decision_tree_index in individual:
        if test:
            prediction_results.append(list_of_decision_trees[decision_tree_index].predict(X))
        else:
            prediction_results.append(population_with_predictions[decision_tree_index])

    prediction_results = np.array(prediction_results)
    predictions_of_individual = st.mode(prediction_results)[0].squeeze()
    fitness = accuracy_score(predictions_of_individual, y)

    return (float(fitness),)

In [6]:
def create_population_to_deap(pop_not_for_deap):
    pop_to_deap = []
    for ind in pop_not_for_deap:
        pop_to_deap.append(creator.Individual(ind))

    return pop_to_deap

def update_fitnesses_to_population(pop, fitnesses):
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    return pop


   

def label_encode(df):
    le = LabelEncoder()
    target_col = df.columns[-1]
    le.fit(df[target_col])
    df[target_col] = le.transform(df[target_col])
    return df


def convert_df_to_x_y(df):
    target_col = df.columns[-1]
    y = df[target_col].to_numpy()
    del df[target_col]
    X = df.values
    return X,y


def train_val_test_split(X,y, test_size, val_size):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=1)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size, random_state=1)
    return X_train, X_val, y_train, y_val, X_test, y_test


def handle_categorial(df):
    new_df = copy.deepcopy(df)
    for item_idx, (key, val) in enumerate(df.dtypes.items()):
        if item_idx == df.dtypes.size - 1:
            break
        if val == object:
            dummy = pd.get_dummies(df[key], dtype=float)
            new_df = pd.concat([dummy, new_df], axis=1)
            del new_df[key]
    return new_df


def calculate_legal_trees_number(num_of_features, trees=1000):
    if num_of_features <= 2:
        return trees
    while (True):            
        if trees % (num_of_features - 2) == 0:
            return trees
        else:
            trees -= 1
            
def get_common_val(df,col):
    for key, val in df[col].value_counts().items():
        return key
            
def fill_na(df):
    for item_idx, (key, val) in enumerate(df.dtypes.items()):
        if item_idx == df.dtypes.size - 1:
            break
        if val == object:
            most_common = get_common_val(df,key)
            df[key].fillna(most_common, inplace=True)
        if val == float or val == int:
            med = df[key].median()
            df[key].fillna(med, inplace=True)
    return df

def preprocess(df):
    df = fill_na(df)
    df = handle_categorial(df)
    df = label_encode(df)
    return df

def initialize_data_set(curr_dataset_path, test_size, val_size):
    df = pd.read_csv(curr_dataset_path)
    df = preprocess(df)
    X, y = convert_df_to_x_y(df)
    num_of_features = df.shape[1]
    X_train, X_val, y_train, y_val, X_test, y_test = train_val_test_split(X,y, test_size, val_size)
    return X_train, X_val, y_train, y_val, X_test, y_test, num_of_features

def initialize_x_y(curr_dataset_path):
    df = pd.read_csv(curr_dataset_path)
    df = preprocess(df)
    X, y = convert_df_to_x_y(df)
    num_of_features = df.shape[1]
    return X,y, num_of_features



In [7]:
def HGARF(total_number_of_trees, num_of_features, chromosome_length, number_of_generations, tour_size, crossover_rate,mutation_rate):

    global list_of_decision_trees
    
    total_number_of_trees = calculate_legal_trees_number(num_of_features)
    
    list_of_decision_trees = create_forest(total_number_of_trees=total_number_of_trees, num_of_features=num_of_features)

    pop, best_ind = initialize_evolution_functions(total_number_of_trees, chromosome_length, tour_size)

    pop, logbook = algorithms.eaSimple(pop,
                               toolbox,
                               cxpb=crossover_rate,
                               mutpb=mutation_rate,
                               ngen=number_of_generations,
                               stats=stats,
                               halloffame=best_ind,
                               verbose=False)

    return pop, logbook, best_ind

In [8]:
def evaluate_individual_with_probs(individual, test=False):
    
    global population_with_predictions

    if test:
        X, y = x_test,y_test
    else:
        X, y = x_val,y_val

    prediction_results = []
    for decision_tree_index in individual:
        prediction_results.append(list_of_decision_trees[decision_tree_index].predict_proba(X))

    prediction_results = np.array(prediction_results)
    predictions_of_individual = np.sum(prediction_results, axis=0) / len(individual)

    return predictions_of_individual


def calculate_fpr(y_preds,y_true):
    cnf_matrix = confusion_matrix(y_true, y_preds)
    FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
    FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
    TP = np.diag(cnf_matrix)

    TN = cnf_matrix.sum() - (FP + FN + TP)
    FP = FP.astype(float)
    TN = TN.astype(float)
    return np.mean(FP/(FP+TN))

def calculate_precision_recall_auc(y_preds,y_test):
    precision_array = []
    for i in np.unique(y_test):
        one_vs_all_for_specific_class_preds = np.where(y_preds==i,1,0)
        one_vs_all_for_specific_class_test = np.where(y_test==i,1,0)
        precision_array.append(average_precision_score(one_vs_all_for_specific_class_test, one_vs_all_for_specific_class_preds))
    return np.mean(precision_array)

In [9]:
def HGARF_minimize_function(params):
    total_number_of_trees, crossover_rate ,mutation_rate = [val for key,val in params.items()]
    pop, logbook, best_ind = HGARF(total_number_of_trees=int(total_number_of_trees),
                            num_of_features=x_training.shape[1],
                            chromosome_length=8,
                            number_of_generations=3,
                            tour_size=4,
                            crossover_rate=crossover_rate,
                            mutation_rate=mutation_rate)
    val_acc_hgarf = evaluate_individual(best_ind[0])[0]
    return 1-val_acc_hgarf

def Ada_minimize_function(params):
    learning_rate, n_estimators = [val for key,val in params.items()]
    clf = AdaBoostClassifier(n_estimators=int(n_estimators), learning_rate=learning_rate)
    score = cross_val_score(clf, x_train , y_train, cv=val_folds)
    return 1 - (sum(score)/len(score))

In [10]:
def cv_results(params, counter, opti_res):
    for idx, row in enumerate(trials):
        if counter == 0:
            params.append(dict())
            params[idx]["params"] = row["misc"]['vals']
            params[idx]['score'] = 0
        params[idx]['score'] += (row['result']['loss']/3)
    return params

def get_best_params(params):
    best_val, best_idx = 100000, -1
    for idx, row in enumerate(params):
        if row["score"] < best_val:
            best_val = row['score']
            best_idx = idx
    return params[best_idx]['params']


def to_one_hot(yi):
    b = np.zeros((yi.size, y.max()+1))
    b[np.arange(yi.size),yi] = 1
    return b


def calulate_measure_HGARF(best_params, fold_num, dataset):
    row = {}
    row["Dataset Name"] = dataset
    row["Algorithm Name"] = "HGARF"
    row["Cross Validation"] = fold_num
    num_tress = int(best_params["total_number_of_trees"][0])
    num_feat = x_training.shape[1]
    cross = best_params["crossover_rate"][0]
    mut = best_params["mutation_rate"][0]
    start = time.time()
    pop, logbook, best_ind = HGARF(total_number_of_trees=num_tress,
                            num_of_features=num_feat,
                            chromosome_length=8,
                            number_of_generations=3,
                            tour_size=4,
                            crossover_rate=cross,
                            mutation_rate=mut)
    time_training_hgarf = time.time() - start
    row["Training Time"] = time_training_hgarf
    start = time.time()
    test_acc_hgarf = evaluate_individual(best_ind[0], test=True)[0]
    time_inference_hgarf = time.time() - start
    row["Accuracy"] = test_acc_hgarf
    row["Inference Time"] = time_inference_hgarf
    y_preds_hgarf = evaluate_individual_with_probs(best_ind[0], test=True)
    row["Hyper-Parameters Values"] = f'{num_tress},{num_feat},8, 3, 4,{cross},{mut}' 
    if len(np.unique(y_test)) == 2:
        #BINARY
        roc_auc_hgarf = roc_auc_score(y_test, y_preds_hgarf[:,1],multi_class="ovo")
    else:
        roc_auc_hgarf = roc_auc_score(y_test, y_preds_hgarf,multi_class="ovo")
    
    row["AUC"] = roc_auc_hgarf
    
    hgarf_preds_classes = np.argmax(evaluate_individual_with_probs(best_ind[0], test=True),axis=1)

    precision_hgarf = precision_score(hgarf_preds_classes,y_test,average='weighted')
    row["Precision"] = precision_hgarf

    trp_hgarf = recall_score(hgarf_preds_classes, y_test, average='weighted')
    row["TPR"] = trp_hgarf

    fpr_hgarf = calculate_fpr(y_test,hgarf_preds_classes)
    row["FPR"] = fpr_hgarf

    pr_auc_hgarf = calculate_precision_recall_auc(hgarf_preds_classes,y_test)
    row["PR-Curve"] = pr_auc_hgarf
    
    return row


def calulate_measure_ada(best_params, fold_num, dataset):
    row = {}
    row["Dataset Name"] = dataset
    row["Algorithm Name"] = "AdaBoost"
    row["Cross Validation"] = fold_num
    learning_rate, n_estimators = [val for key,val in best_params.items()]
    clf = AdaBoostClassifier(n_estimators=int(n_estimators), learning_rate=learning_rate)
    start = time.time()
    clf.fit(x_train, y_train)
    time_training_ada = time.time() - start
    row["Training Time"] = time_training_ada
    start = time.time()
    test_acc_ada = clf.score(x_test, y_test)
    time_inference_ada = time.time() - start
    row["Accuracy"] = test_acc_ada
    row["Inference Time"] = time_inference_ada
    y_preds_adaboost = clf.predict_proba(x_test)
    row["Hyper-Parameters Values"] = f'{n_estimators},{learning_rate}' 
    if len(np.unique(y_test)) == 2:
        #BINARY
        roc_auc_adaboost = roc_auc_score(y_test, y_preds_adaboost[:,1],multi_class="ovo")
    else:
        roc_auc_adaboost = roc_auc_score(y_test, y_preds_adaboost,multi_class="ovo")
    
    row["AUC"] = roc_auc_adaboost
    
    adaboost_preds_classes = clf.predict(x_test)

    precision_adaboost = precision_score(adaboost_preds_classes,y_test,average='weighted')
    row["Precision"] = precision_adaboost

    trp_adaboost = recall_score(adaboost_preds_classes, y_test, average='weighted')
    row["TPR"] = trp_adaboost

    fpr_adaboost = calculate_fpr(y_test,adaboost_preds_classes)
    row["FPR"] = fpr_adaboost

    pr_auc_adaboost = calculate_precision_recall_auc(adaboost_preds_classes,y_test)
    row["PR-Curve"] = pr_auc_adaboost
    
    return row


def get_spaces():
    HGARF_space = {"total_number_of_trees": hp.randint('total_number_of_trees', 50, 100),
               "crossover_rate": hp.uniform('crossover_rate', 0, 1),
               "mutation_rate": hp.uniform('mutation_rate', 0, 1)}

    ada_space = {"n_estimators":  hp.randint('n_estimators', 100, 500),
                 'learning_rate': hp.uniform('learning_rate', 0.5, 1.5)}
    
    return HGARF_space, ada_space


def get_running_properties():
    test_folds,val_folds, dataset_limit, iter_num = 5, 3, 50, 50
    return test_folds,val_folds, dataset_limit, iter_num


def get_result_df():
    columns = ["Dataset Name", "Algorithm Name", "Cross Validation", "Hyper-Parameters Values", "Accuracy", "TPR",
          "FPR","Precision", "AUC", "PR-Curve", "Training Time", "Inference Time"]
    df = pd.DataFrame(columns=columns)
    return df


def reach_dataset_limit(idx, dataset_limit):
    return idx == dataset_limit


def check_for_labels(x_train, y_train, y_test):
    if len(np.unique(y_test)) == len(np.unique(y_train)):
        return x_train, y_train
    else:
        missing_values = list(set(np.unique(y_train)) - set(np.unique(y_test)))
        to_remove = [i for (i,v) in enumerate(y_train) if v in missing_values] 
        x_train, y_train = np.delete(x_train,to_remove,0), np.delete(y_train,to_remove) 
    return x_train, y_train

In [11]:
def send_mail(idx):
    port = 465  # For SSL
    smtp_server = "smtp.gmail.com"
    sender_email = "yarden.experiments@gmail.com"  # Enter your address
    receiver_email = "rotemyar@post.bgu.ac.il"  # Enter receiver address
    password = 'Nadav123!'
    message = f"Subject: Ensemble project: currently in {idx}/50 datasets."

    context = ssl.create_default_context()
    with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
        server.login(sender_email, password)
        server.sendmail(sender_email, receiver_email, message)

In [12]:
toolbox = base.Toolbox()
stats = tools.Statistics(key=lambda ind: ind.fitness.values)

HGARF_space, ada_space = get_spaces()
test_folds,val_folds, dataset_limit, iter_num = get_running_properties()
df = get_result_df()
start = time.time()
for idx, dataset in enumerate(os.listdir(datasets_path)):
    if reach_dataset_limit(idx, dataset_limit):
        break
    if idx % 5 == 0:
        send_mail(idx)
    time_end = time.time() - start
    print(f"Total time: {time_end}")
    start = time.time()
    # Read the current dataset
    curr_dataset_path = f'{datasets_path}{dataset}'
    X, y, num_of_features = initialize_x_y(curr_dataset_path)

    
    toolbox = base.Toolbox()
    stats = tools.Statistics(key=lambda ind: ind.fitness.values)
    
    # Clearing the .csv at dataset name
    dataset = dataset[:-4]
    
    ## The outer loop - 10 fold CV for train-test split.
    skf_test = StratifiedKFold(n_splits=test_folds, random_state=0)
    for fold_num, (train_index, test_index) in enumerate(skf_test.split(X, y)):
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # Removing the rows where label in each train isn't in test
        x_train, y_train = check_for_labels(x_train, y_train, y_test)
        
        ## The inner loop - the 3 CV (can't use cross_val_score, so we use onlu for HGARF)
        skf_val = StratifiedKFold(n_splits=val_folds, random_state=0)
        
        # A list for each outer fold to save in memory the parameters and there score.
        params = []
        
        ## HGARF part
        for counter,(training_index, val_index) in enumerate(skf_val.split(x_train, y_train)):
            x_training, x_val = x_train[training_index], x_train[val_index]
            y_training, y_val = y_train[training_index], y_train[val_index]
            
            # Optimize the problem on this inner fold
            trials = Trials()
            best  = fmin(HGARF_minimize_function,
                        space=HGARF_space,
                        algo=tpe.suggest,
                        max_evals=iter_num,
                        trials=trials)  
            # Add current fold result to param list
            params = cv_results(params, counter, trials)
         
        x_training, y_training = x_train, y_train
        
        # Gives us the best params from the total 50 experiments * 3 folds
        best_params = get_best_params(params)
        
        # Calculate all the measurments for this fold and algorithm
        row = calulate_measure_HGARF(best_params, fold_num, dataset)
        
        # insert to final df table
        df = df.append(row, ignore_index=True)
        
        
        ## Adaboost part (no need for 3 folds cause we use cross val score)
        print("Ada")
        trials = Trials()
        best_params  = fmin(Ada_minimize_function,
                    space=ada_space,
                    algo=tpe.suggest,
                    max_evals=iter_num,
                    trials=trials)

        # Same as before
        row = calulate_measure_ada(best_params, fold_num, dataset)
        df = df.append(row, ignore_index=True)
        print(df.head())
        df.to_csv(f"./results_{idx}_{fold_num}.csv")

SMTPDataError: (421, b'4.3.0 Temporary System Problem.  Try again later (10). n20sm279041vsr.6 - gsmtp')

In [13]:
df.head(100)

Unnamed: 0,Dataset Name,Algorithm Name,Cross Validation,Hyper-Parameters Values,Accuracy,TPR,FPR,Precision,AUC,PR-Curve,Training Time,Inference Time


## Temp!!!

In [14]:


#     start = time.time()
#     pop, logbook, best_ind = HGARF(total_number_of_trees=1000,
#                             num_of_features=num_of_features,
#                             chromosome_length=12,
#                             number_of_generations=10,
#                             tour_size=10,
#                             crossover_rate=0.5,
#                             mutation_rate=0.8)
#     time_training_hgarf = time.time() - start

#     start = time.time()
#     clf = AdaBoostClassifier(n_estimators=100, random_state=0)
#     clf.fit(X_train, y_train)
#     time_training_ada_boost = time.time() - start

#     start = time.time()
#     test_acc_ada_boost = clf.score(x_test, y_test)
#     time_inference_ada_boost = time.time() - start    

#     start = time.time()
#     test_acc_hgarf = evaluate_individual(best_ind[0], test=True)[0]
#     time_inference_hgarf = time.time() - start

#     y_preds_hgarf = evaluate_individual_with_probs(best_ind[0], test=True)
#     y_preds_adaboost = clf.predict_proba(x_test)

#     if len(y_test.unique()) == 2:
#         #BINARY
#         roc_auc_hgarf = roc_auc_score(y_test, y_preds_hgarf[:,1],multi_class="ovo")
#         roc_auc_adaboost = roc_auc_score(y_test, y_preds_adaboost[:,1],multi_class="ovo")
#     else:
#         roc_auc_hgarf = roc_auc_score(y_test, y_preds_hgarf,multi_class="ovo")
#         roc_auc_adaboost = roc_auc_score(y_test, y_preds_adaboost,multi_class="ovo")

#     hgarf_preds_classes = np.argmax(evaluate_individual_with_probs(best_ind[0], test=True),axis=1)
#     adaboost_preds_classes = clf.predict(x_test)

#     precision_hgarf = precision_score(hgarf_preds_classes,y_test,average='weighted')
#     precision_adaboost = precision_score(adaboost_preds_classes,y_test,average='weighted')

#     trp_hgarf = recall_score(hgarf_preds_classes, y_test, average='weighted')
#     trp_adaboost = recall_score(adaboost_preds_classes, y_test, average='weighted')

#     fpr_hgarf = calculate_fpr(y_test,hgarf_preds_classes)
#     fpr_adaboost = calculate_fpr(y_test,adaboost_preds_classes)

#     pr_auc_hgarf = calculate_precision_recall_auc(hgarf_preds_classes,y_test)
#     pr_auc_adaboost = calculate_precision_recall_auc(adaboost_preds_classes,y_test)

In [15]:
# HGARF_params = {"total_number_of_trees": (100, 2000), "crossover_rate": (0,1), "mutation_rate": (0,1)}
# ada_params = {"n_estimators": (100,2000), 'learning_rate': }
# test_folds,val_folds = 10, 3
# NUM_TRIALS = 50
# for i in range(NUM_TRIALS):
#     skf_test = StratifiedKFold(n_splits=test_folds, random_state=i)
#     for train_index, test_index in skf_test.split(X, y):
#         X_train, X_test = X[train_index], X[test_index]
#         y_train, y_test = y[train_index], y[test_index]
#         skf_val = StratifiedKFold(n_splits=val_folds, random_state=i)
#         for j in range(2):
#             params = []
#             counter = -1
#             for training_index, val_index in skf_val.split(X_train, y_train):
#                 counter += 1
#                 x_training, x_val = X_train[training_index], X_train[val_index]
#                 y_training, y_val = y_train[training_index], y_train[val_index]
#                 if j==0:
#                     HGARF_params["num_of_features"] = (x_training.shape[1],x_training.shape[1])

#                     optimizer = BayesianOptimization(
#                                 f=HGARF_maximize_function,
#                                 pbounds=HGARF_params,
#                                 random_state=i)

#                     optimizer.maximize(init_points=2, n_iter=5)
#                     for i, res in enumerate(optimizer.res):
#                         if counter == 0:
#                             params[res["params"]] = 0
#                         params[res["params"]] += (res["target"] / val_folds)
#                     print(params)
# #                         print("Iteration {}: \n\t{}".format(i, res))
#                 else:

#                     AdaBoostClassifier(
#                     DecisionTreeClassifier(max_depth=int(X_train.shape[1]/2)),
#                     n_estimators=1000,
#                     learning_rate=1)
#             bests_params.append(optimizer.max['params'])        
            
#             break
#         break
#     break
    
# print(optimizer.max['params'])