# Machine Learning

In [None]:
from ml_tools.ensemble_learning import run_ensemble_pipeline, RegressionTreeModels
from ml_tools import custom_logger
from ml_tools.utilities import train_dataset_orchestrator
from paths import PM
from helpers.constants import FINAL_TARGETS

## 1 Make Training Datasets

In [None]:
dataset_dirs = [PM["feature engineering clip"], PM["mice datasets"], PM["vif datasets"]]

train_dataset_orchestrator(list_of_dirs=dataset_dirs, # type: ignore
                           target_columns=FINAL_TARGETS,
                           save_dir=PM["model datasets"])

## 2 Set Hyperparameters

In [None]:
hyperparams = {
    'learning_rate': 0.005,
    'L1_regularization': 1,
    'L2_regularization': 1,
    'n_estimators': 1000,
    'max_depth': 8,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'min_child_weight': 3,
    'gamma': 1,
    'num_leaves': 31,
    'min_data_in_leaf': 40
}

model_factory = RegressionTreeModels(**hyperparams)

## 3 Run Training

In [None]:
run_ensemble_pipeline(datasets_dir=PM["model datasets"],
                      save_dir=PM["model metrics"],
                      target_columns=FINAL_TARGETS,
                      model_object=model_factory,
                      save_model=True)

## 4 Log Results

In [None]:
train_results = {
    "best model": "",
    "": 0,
    "": 0,
    "": 0,
}

custom_logger(data=train_results | hyperparams,
              save_directory=PM["logs"],
              log_name="training result")