# Imports

In [14]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston 
from sklearn.metrics import mean_absolute_error as mae_score
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor as SkBoosting
import os, sys
import time

## Import JIT trees module

In [3]:
sys.path.append('..')  # as the module is created in the upper directory
import JITtrees

# Get dataset

In [7]:
x_all, y_all = load_boston(return_X_y=True)  # get data
print(f"Dataset size: {y_all.shape[0]}")
print(f"Feature count: {x_all.shape[1]}")

Dataset size: 506
Feature count: 13


## Split dataset into train and test

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x_all, y_all, test_size=0.2, random_state=12)
print(f"Train length: {y_train.shape[0]}, test length: {y_test.shape[0]}")

Train length: 404, test length: 102


# Fit model

In [19]:
options = {
    'bins': 256,
    'patience': 4,
    'tree_count': 200,
    'tree_depth': 4,
    'feature_fold_size': 0.8,
    'learning_rate': 0.2,
    'es_delta': 1e-5,
    'batch_part': 0.8,
    'use_jit': False,
    'jit_type': 0
}

model = JITtrees.Boosting(options['bins'], options['patience'])
start_time = time.time() # get start time to count the time of execution
history = model.fit(x_train, y_train, x_test, y_test, options['tree_count'],
    options['tree_depth'], int(np.ceil(x_train.shape[1] * options['feature_fold_size'])), 
    options['learning_rate'], options['es_delta'],
    options['batch_part'], options['use_jit'], options['jit_type'])
exec_time = time.time() - start_time
print(f"Fit time: {exec_time} seconds")

Fit time: 0.2519867420196533 seconds


# Evaluate
## Get predictions

In [20]:
preds = model.predict(x_test)
mae = mae_score(y_test, preds)

# compare to Scikit-learn model
sk_model = SkBoosting(learning_rate=options['learning_rate'],
                      max_iter=options['tree_count'],
                      max_depth=options['tree_depth'])
start_time = time.time()
sk_model.fit(x_train, y_train)
sk_fit_time = time.time() - start_time
sk_preds = sk_model.predict(x_test)
sk_mae = mae_score(y_test, sk_preds)

print(f"MAE score: {mae}, Sklearn model MAE: {sk_mae}")
print(f"Sklearn model fit time: {sk_fit_time} seconds")

MAE score: 2.3510144863302136, Sklearn model MAE: 2.527354851153334
Sklearn model fit time: 0.3945498466491699 seconds
