# JiaoCheng Sample Usage

In [None]:
import warnings;
warnings.simplefilter('ignore')

In [None]:
import sys
import os
py_file_location = '../../Source Code'
sys.path.append(os.path.abspath(py_file_location))

In [None]:
import JiaoCheng

import pandas as pd
import pickle

In [None]:
jiaocheng = JiaoCheng.JiaoCheng()

JiaoCheng Initialised


### Import, Prepare data and Read in

In [None]:
train_data = pd.read_csv('./drive/MyDrive/Brownlow/data/curated/modelling/N_Train_1.csv')
val_data = pd.read_csv('./drive/MyDrive/Brownlow/data/curated/modelling/N_Validate_1.csv')
test_data = pd.read_csv('./drive/MyDrive/Brownlow/data/curated/modelling/N_Test_1.csv')

In [None]:
train_x = train_data.drop(['1'], axis=1)
train_y = train_data['1']
val_x = val_data.drop(['1'], axis=1)
val_y = val_data['1']
test_x = test_data.drop(['1'], axis=1)
test_y = test_data['1']

In [None]:
jiaocheng.read_in_data(train_x, train_y, val_x, val_y, test_x, test_y)

Read in Train X data
Read in Train y data
Read in Val X data
Read in Val y data
Read in Test X data
Read in Test y data


### Initialise model and set hyperparameters

In [None]:
from sklearn.ensemble import RandomForestRegressor as RFR

In [None]:
jiaocheng.read_in_model(RFR, 'Regression')

Successfully read in model <class 'interpret.glassbox.ebm.ebm.ExplainableBoostingRegressor'>, which is a Regression model


In [None]:
parameter_choices = {
    'n_estimators': (25, 50, 100, 200, 400),
    'max_depth': (3, 6, 12, 24, 48, 96, 192),
    'max_samples': (0.4, 0.55, 0.7, 0.85),
    'max_features': (0.4, 0.55, 0.7, 0.85),
    'ccp_alpha': (0.00000001, 0.000001, 0.0001, 0.01, 0.1)
}

In [None]:
jiaocheng.set_hyperparameters(parameter_choices)

Successfully recorded hyperparameter choices


In [None]:
jiaocheng.set_non_tuneable_hyperparameters({'random_state': 19260817, 'n_jobs': -1})

In [None]:
jiaocheng.set_tuning_order(['max_depth', 'max_features', 'max_samples', 'n_estimators', 'ccp_alpha'])

In [None]:
jiaocheng.set_hyperparameter_default_values({
    'n_estimators': 50,
    'max_depth': 12,
    'max_samples': 0.7,
    'max_features': 0.7,
    'ccp_alpha': 0.01})

### Input feature combinations for tuning via NingXiang

In [None]:
with open('./drive/MyDrive/Brownlow/models/NingXiang_output_1.pickle', 'rb') as f:
    ningxiang_output = pickle.load(f)

In [None]:
jiaocheng.set_features(ningxiang_output)

In [None]:
jiaocheng.set_tuning_order(['features', 'max_depth', 'max_features', 'max_samples', 'n_estimators', 'ccp_alpha'])

In [None]:
jiaocheng.set_hyperparameter_default_values({
    'features': 0, 
    'n_estimators': 50,
    'max_depth': 12,
    'max_samples': 0.7,
    'max_features': 0.7,
    'ccp_alpha': 0.01})

### Set output directory

In [None]:
# When Tuning did not complete in one go, uncomment this cell and run code to initialise the internal structures of YangZhou
# When this is done, YangZhou will automatically pick up tuning where it left off
try:
    jiaocheng.read_in_tuning_result_df('./drive/MyDrive/Brownlow/models/tuning/N_rfr_1.csv')
except:
    pass

In [None]:
jiaocheng.set_tuning_result_saving_address('./drive/MyDrive/Brownlow/models/tuning/N_rfr_1')

Successfully set tuning output address


In [None]:
jiaocheng.set_best_model_saving_address('./drive/MyDrive/Brownlow/models/tmp_models/N_rfr_1')

### Begin Tuning

In [None]:
jiaocheng.tune()


Default combo: [4, 2, 3, 1, 2, 1] 


ROUND 1

Hyperparameter: interactions (index: 1) 

Trained and Tested combination 1 of 26880: (4, 0, 3, 1, 2, 1), taking 5.11 seconds to get val score of 0.3657
        Current best combo: (4, 0, 3, 1, 2, 1) with val score 0.3657
Trained and Tested combination 2 of 26880: (4, 1, 3, 1, 2, 1), taking 4.89 seconds to get val score of 0.367
        Current best combo: (4, 1, 3, 1, 2, 1) with val score 0.367
Trained and Tested combination 3 of 26880: (4, 2, 3, 1, 2, 1), taking 6.09 seconds to get val score of 0.3702
        Current best combo: (4, 2, 3, 1, 2, 1) with val score 0.3702
Trained and Tested combination 4 of 26880: (4, 3, 3, 1, 2, 1), taking 5.46 seconds to get val score of 0.373
        Current best combo: (4, 3, 3, 1, 2, 1) with val score 0.373

Best combo after this round: (4, 3, 3, 1, 2, 1) 


Hyperparameter: max_bins (index: 0) 

Trained and Tested combination 5 of 26880: (0, 3, 3, 1, 2, 1), taking 6.68 seconds to get val score of 0.359


### View Best Tuning Output

In [None]:
jiaocheng.view_best_combo_and_score()

(Current) Best combo: (2, 3, 3, 1, 2, 1) with val score 0.37419687271799396
