In [None]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Get the directory path one level up
one_up_dir = os.path.abspath(os.path.join(current_dir, '..'))

# Add the directory to the Python path if it's not already there
if one_up_dir not in sys.path:
    sys.path.append(one_up_dir)

In [None]:
import os
import sys

# Assumes you are running this from a folder one level inside gloabl_files. gloabl_files/project_folder/example_usage.py. 
# gloabl_files should contain ensemble_genetic_algorithm. 
# Setup, navigate to gloabl_files and git clone ensemble_genetic_algorithm.
# Create a project folder in gloabl_files and put this script inside it. 

# Get the current working directory
current_dir = os.getcwd()

# Get the parent directory (one level up)
parent_dir = os.path.dirname(current_dir)

# Append the "global_files" directory to the parent directory
global_files_dir = os.path.join(parent_dir, "gloabl_files")  # Corrected typo in "global_files"

# Append the "ensemble_genetic_algorithm" directory to the global_files directory
ensemble_genetic_algorithm_dir = os.path.join(global_files_dir, "ensemble_genetic_algorithm")

# Append the "ml_grid" directory to the ensemble_genetic_algorithm directory
ml_grid_dir = os.path.join(ensemble_genetic_algorithm_dir, "ml_grid")

# Append the "notebooks" directory to the ensemble_genetic_algorithm directory
notebooks_dir = os.path.join(ensemble_genetic_algorithm_dir, "notebooks")

# Add all directories to the Python path
sys.path.append(global_files_dir)
sys.path.append(ensemble_genetic_algorithm_dir)
sys.path.append(ml_grid_dir)
sys.path.append(notebooks_dir)


In [None]:
pwd

In [None]:
import os

# Function to recursively remove a directory
def remove_directory(path):
    if os.path.exists(path):
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
        os.rmdir(path)
        print(f"Directory '{path}' removed successfully.")
    else:
        print(f"Directory '{path}' does not exist.")

# Specify the directory path to be removed
directory_path = 'HFE_GA_experiments'

# Remove the directory
remove_directory(directory_path)


In [None]:
# Enable or disable GPU

# #os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
# os.environ["CUDA_VISIBLE_DEVICES"]="-1"

In [None]:
# import shutil
# import os
# directory = r'HFE_GA_experiments'

# if os.path.exists(directory):
#     shutil.rmtree(directory)
#     print(f"The directory '{directory}' has been successfully removed.")
# else:
#     print(f"The directory '{directory}' does not exist.")


In [None]:
import ipywidgets as ipw
output = ipw.Output()

#### Setup logs

In [None]:
from ml_grid.util.logger_setup import setup_logger
import logging

logger = logging.getLogger('matplotlib.font_manager')

# Set the logging level to suppress debug messages
logger.setLevel(logging.INFO)

In [None]:
#
import ml_grid
import pathlib
import datetime
from tqdm import tqdm
from ml_grid.model_classes_ga.dummy_model import DummyModelGenerator
from ml_grid.util import grid_param_space_ga

import pandas as pd
from ml_grid.model_classes_ga.adaboostClassifier_model import (
    AdaBoostClassifierModelGenerator,
)
from ml_grid.model_classes_ga.extra_trees_model import extraTreesModelGenerator
from ml_grid.model_classes_ga.gradientBoostingClassifier_model import (
    GradientBoostingClassifier_ModelGenerator,
)
from ml_grid.model_classes_ga.logistic_regression_model import logisticRegressionModelGenerator
from ml_grid.model_classes_ga.mlpClassifier_model import MLPClassifier_ModelGenerator
from ml_grid.model_classes_ga.perceptron_model import perceptronModelGenerator
from ml_grid.model_classes_ga.quadraticDiscriminantAnalysis_model import (
    QuadraticDiscriminantAnalysis_ModelGenerator,
)
from ml_grid.model_classes_ga.randomForest_model import randomForestModelGenerator
from ml_grid.model_classes_ga.svc_model import SVC_ModelGenerator
from ml_grid.model_classes_ga.gaussianNB_model import GaussianNB_ModelGenerator
from ml_grid.model_classes_ga.kNearestNeighbors_model import (
    kNearestNeighborsModelGenerator,
)
from ml_grid.model_classes_ga.elasticNeuralNetwork_model import (
    elasticNeuralNetworkModelGenerator,
)
from ml_grid.model_classes_ga.XGBoost_model import XGBoostModelGenerator
from ml_grid.model_classes_ga.decisionTreeClassifier_model import (
    DecisionTreeClassifierModelGenerator,
)
from ml_grid.model_classes_ga.pytorchANNBinaryClassifier_model import (
    Pytorch_binary_class_ModelGenerator,
)


from ml_grid.util.project_score_save import project_score_save_class

base_project_dir_global = "HFE_GA_experiments/"

logger = setup_logger(log_folder_path = base_project_dir_global)

pathlib.Path(base_project_dir_global).mkdir(parents=True, exist_ok=True)

st_time = datetime.datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p")

base_project_dir = "HFE_GA_experiments/" + st_time + "/"
additional_naming = "HFE_GA_Grid_"

print(base_project_dir)

pathlib.Path(base_project_dir).mkdir(parents=True, exist_ok=True)

# input_csv_path = '/home/aliencat/samora/HFE/HFE/v20/30163_to_16408_imputed_outcome_grid.csv'

input_csv_path = (
    "/home/aliencat/samora/HFE/HFE/v22/hfe_TC_merge_T_Im_10k_1yr_mean_imputed.csv"
)

input_csv_path = "synthetic_sample_100_features_4.csv"

# init csv to store each local projects results

project_score_save_class(base_project_dir)

n_iter = 3

grid_iter_obj = grid_param_space_ga.Grid(sample_n=n_iter).settings_list_iterator

#dummy_generator = DummyModelGenerator(ml_grid_object, local_param_dict)

modelFuncList = [
    #         dummy_model_gen,
    #         dummy_model_gen,
    #dummy_generator.dummy_model_gen,
    logisticRegressionModelGenerator,
    perceptronModelGenerator,
    extraTreesModelGenerator,
    randomForestModelGenerator,
    kNearestNeighborsModelGenerator,
    XGBoostModelGenerator,
    DecisionTreeClassifierModelGenerator,
    AdaBoostClassifierModelGenerator,
    elasticNeuralNetworkModelGenerator,
    GaussianNB_ModelGenerator,
    QuadraticDiscriminantAnalysis_ModelGenerator,
    SVC_ModelGenerator,
    GradientBoostingClassifier_ModelGenerator,
    MLPClassifier_ModelGenerator,
    # #     #KerasClassifier_ModelGen()
    Pytorch_binary_class_ModelGenerator,
]


config_dict = {
    "use_stored_base_learners": False,
    "modelFuncList": modelFuncList,
}


for i in tqdm(range(0, n_iter)):
    output.clear_output(wait=True)

    # get settings from iterator over grid of settings space
    local_param_dict = next(grid_iter_obj)

    # create object from settings
    ml_grid_object = ml_grid.pipeline.data.pipe(
        input_csv_path,
        drop_term_list=["chrom", "hfe", "phlebo"],
        local_param_dict=local_param_dict,
        base_project_dir=base_project_dir,
        additional_naming=additional_naming,
        test_sample_n=0,
        column_sample_n=0,
        param_space_index=i,
        config_dict=config_dict,
        testing = True, #use test grid
        multiprocessing_ensemble=False
    )

    ml_grid_object.verbose = 0

    dummy_generator = DummyModelGenerator(ml_grid_object, local_param_dict)

    modelFuncList.append(dummy_generator.dummy_model_gen)

    use_stored_base_learners = local_param_dict.get("use_stored_base_learners")

    store_base_learners = local_param_dict.get("store_base_learners")

    from ml_grid.pipeline import main_ga

    # pass object to be evaluated and write results to csv
    res = main_ga.run(ml_grid_object, local_param_dict=local_param_dict).execute()

In [None]:
df = pd.read_csv(ml_grid_object.base_project_dir + "final_grid_score_log.csv")

df