# Tuning Script

In [4]:
# Parameters subject to change in every script - involved in naming

MODEL = 'eb'
TUNER = 'yangzhoub'

---

In [5]:
# detect whether this is a google environment

COLAB_ENVIRONMENT = False

try:
    from google.colab import drive
    drive.mount('/content/drive')
    COLAB_ENVIRONMENT = True
except:
    pass

Mounted at /content/drive


In [6]:
import sys
import os
import pickle

if COLAB_ENVIRONMENT:
    home_directory = './drive/MyDrive/LAB/AFLBrownlowPredictor2024/' # my home directory is stored in ./LAB of google drive
    if MODEL == 'eb':
        !pip install interpret==0.5.0
    elif MODEL == 'cbr':
        !pip install catboost
    !pip install JXAutoML
else:
    home_directory = '../../'

from JXAutoML.YangZhouB import YangZhouB as Tuner

import pandas as pd

Collecting interpret==0.5.0
  Downloading interpret-0.5.0-py3-none-any.whl.metadata (1.1 kB)
Collecting interpret-core==0.5.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret==0.5.0)
  Downloading interpret_core-0.5.0-py3-none-any.whl.metadata (2.7 kB)
Collecting dash>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret==0.5.0)
  Downloading dash-2.17.1-py3-none-any.whl.metadata (10 kB)
Collecting dash-core-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret==0.5.0)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-html-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.0->interpret==0.5.0)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-table>=4.1.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity

In [7]:
# Parameters that are same for every script
SEED = 42

TASK_TYPE = 'Regression'  # this task is a classification task

# we have decided to tune features as hp
TUNE_FEATURES_AS_HYPERPARAMETERS = True

---

In [8]:
# model to use to tune
from interpret.glassbox import ExplainableBoostingRegressor as clf

# what values to try for each hyperparameter
parameter_choices = {
    'max_bins': (4, 6, 64, 256, 1024, 4096),
    'min_samples_leaf': (2, 4, 8, 16, 32),
    'interactions': (2, 4, 8),
    'max_leaves': (100, 1000, 10000),
    'learning_rate': (0.001, 0.01, 0.1, 1),
    'max_rounds': (25, 50, 100, 200),

}

# what values to set non-tuneable parameters/hyperparameters
non_tunable_hyperparameters_dict = {
                                    'random_state': 19260817,
                                    'n_jobs': -1
                                    }

---

Read in and Prepare Data

In [9]:
train_data = pd.read_csv(f'{home_directory}data/curated/modelling/train.csv')
val_data = pd.read_csv(f'{home_directory}data/curated/modelling/val.csv')
test_data = pd.read_csv(f'{home_directory}data/curated/modelling/test.csv')

train_X = train_data.drop(columns=['target'])
train_y = train_data['target']
val_X = val_data.drop(columns=['target'])
val_y = val_data['target']
test_X = test_data.drop(columns=['target'])
test_y = test_data['target']

In [10]:
# Import feature importance ordering

with open(f'{home_directory}models/feature_importance_ordering.pickle', 'rb') as f:
    feature_importance_ordering = pickle.load(f)

Set parameters

In [11]:
# initialisation
tuner = Tuner()

print('---')

# define what model we are tuning
tuner.read_in_model(clf, TASK_TYPE, pytorch_model=False, optimised_metric='r2')

print('---')

# read in the data for training and validation
tuner.read_in_data(train_X, train_y, val_X, val_y, test_X, test_y)

print('---')

# set what hp values to tune
tuner.set_hyperparameters(parameter_choices)
# WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up hp values that need to be changed from default but NOT to be tuned
tuner.set_non_tuneable_hyperparameters(non_tunable_hyperparameters_dict)

print('---')

# set up feature importance ordering

if TUNE_FEATURES_AS_HYPERPARAMETERS:
    tuner.set_features(feature_importance_ordering)
    # WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

try:  # try to read in previous results to continue tuning ...
    tuner.read_in_tuning_result_df(
        f'{home_directory}models/tuning/{TUNER}_{MODEL}.csv')
except:
    print('No previous tuning data read in')

print('---')

# set up where to save the tuning result csv
tuner.set_tuning_result_saving_address(
    f'{home_directory}models/tuning/{TUNER}_{MODEL}.csv')

print('---')

# set up where to save the current best model
tuner.set_best_model_saving_address(
    f'{home_directory}models/tmp_models/{TUNER}_{MODEL}')

YangZhouB Initialised
---
Successfully read in model <class 'interpret.glassbox._ebm._ebm.ExplainableBoostingRegressor'>, which is a Regression model optimising for r2
---
Read in Train X data
Read in Train y data
Read in Val X data
Read in Val y data
Read in Test X data
Read in Test y data
---
Successfully recorded hyperparameter choices
---
Successfully recorded non_tuneable_hyperparameter choices
---
Successfully recorded tuneable feature combination choices and updated relevant internal structures
---
No previous tuning data read in
---
Successfully set tuning output address
---
Successfully set best model output address


In [12]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# begin tuning ... (or continue training)

tuner.tune()

BEGIN TUNING


STAGE ZERO: Tune all Cruise combinations


Trained and Tested combination 1 of 159840: (0, 0, 2, 2, 3, 3, 15), taking 13.09 seconds to get val score of 0.8306
        Current best combo: (0, 0, 2, 2, 3, 3, 15) with val score 0.8306
Trained and Tested combination 2 of 159840: (0, 4, 2, 0, 0, 3, 24), taking 17.73 seconds to get val score of 0.7243
        Current best combo: (0, 0, 2, 2, 3, 3, 15) with val score 0.8306
Trained and Tested combination 3 of 159840: (0, 4, 2, 0, 0, 3, 20), taking 15.75 seconds to get val score of 0.7199
        Current best combo: (0, 0, 2, 2, 3, 3, 15) with val score 0.8306
Trained and Tested combination 4 of 159840: (5, 0, 2, 2, 0, 3, 36), taking 111.38 seconds to get val score of 0.7256
        Current best combo: (0, 0, 2, 2, 3, 3, 15) with val score 0.8306
Trained and Tested combination 5 of 159840: (0, 0, 2, 0, 0, 3, 32), taking 20.71 seconds to get val score of 0.7246
        Current best combo: (0, 0, 2, 2, 3, 3, 15) with val score 0.8