In [None]:
import InsurAutoML
from InsurAutoML import load_data, AutoTabularRegressor
import numpy as np
from sklearn.metrics import r2_score

seed = 42
n_trials = 64
N_ESTIMATORS = 5
TIMEOUT = (n_trials / 4) * 450

InsurAutoML.set_seed(seed)

In [None]:
# load data
database = load_data(data_type = ".rdata").load(path = "")
database_names = [*database]
database_names

In [None]:
database["data"].head(5)

In [None]:
response = ["yAvgBC"]
features = [
    'TypeCity', 'TypeCounty', 'TypeMisc', 'TypeSchool', 'TypeTown', 'TypeVillage', 'IsRC', 'CoverageBC', 'lnDeductBC', 
    'NoClaimCreditBC', 'CoverageIM', 'lnDeductIM', 'NoClaimCreditIM', 'CoveragePN', 'NoClaimCreditPN', 'CoveragePO', 
    'NoClaimCreditPO','CoverageCN', 'NoClaimCreditCN', 'CoverageCO', 'NoClaimCreditCO'
]
# log transform of response
database["data"][response] = np.log(database["data"][response] + 1)
database["dataout"][response] = np.log(database["dataout"][response] + 1)
# log transform of coverage feateres
database["data"][["CoverageBC", "CoverageIM", "CoveragePN", "CoveragePO", "CoverageCN", "CoverageCO"]] = np.log(
    database["data"][["CoverageBC", "CoverageIM", "CoveragePN", "CoveragePO", "CoverageCN", "CoverageCO"]] + 1
)
database["dataout"][["CoverageBC", "CoverageIM", "CoveragePN", "CoveragePO", "CoverageCN", "CoverageCO"]] = np.log(
    database["dataout"][["CoverageBC", "CoverageIM", "CoveragePN", "CoveragePO", "CoverageCN", "CoverageCO"]] + 1
)

train_X, train_y = database["data"][features], database["data"][response]
test_X, test_y = database["dataout"][features], database["dataout"][response]

In [None]:
# fit AutoML model
mol = AutoTabularRegressor(
    model_name = "LGPIF_{}".format(n_trials),
    n_estimators = N_ESTIMATORS,    
    max_evals = n_trials,
    timeout = TIMEOUT,
    validation="KFold",
    valid_size=0.2,
    search_algo="HyperOpt",
    objective= "R2",
    cpu_threads = 12,
    seed = seed,    
)
mol.fit(train_X, train_y)

In [None]:
train_pred = mol.predict(train_X)
test_pred = mol.predict(test_X)
r2_score(train_y, train_pred), r2_score(test_y, test_pred)