In [None]:
import InsurAutoML
from InsurAutoML import load_data, AutoTabularRegressor
import numpy as np
import pandas as pd
from sklearn.metrics import mean_poisson_deviance

seed = 42
n_trials = 64
N_ESTIMATORS = 5
TIMEOUT = (n_trials / 4) * 450

InsurAutoML.set_seed(seed)

In [None]:
# load data
database = load_data(data_type = ".csv").load(path = "")
database_names = [*database]
database_names

In [None]:
database["freMTPL2freq"].head(5)

In [None]:
# define response/features
response = "ClaimNb"
features = np.sort(list(
    set(database["freMTPL2freq"].columns) - set(["IDpol", "ClaimNb"])
))

#### Train index generation R code

Case Study: French Motor Third-Party Liability Claims
```{R}
RNGversion("3.5.0")
set.seed (100)
ll <- sample (c (1: nrow ( freMTPL2freq )) , round (0.9* nrow ( freMTPL2freq )) , replace = FALSE )
write.csv(ll, "train_index.csv") # the train_index.csv generated in R is utilized in AutoML train/test split
```

In [None]:
# read train index & get test index
# python dataframe index starts from 0, but R starts from 1
train_index = np.sort(pd.read_csv("train_index.csv").values.flatten()) - 1
test_index = np.sort(
    list(set(database["freMTPL2freq"].index) - set(train_index))
)
# train/test split
train_X, test_X, train_y, test_y = (
    database["freMTPL2freq"].loc[train_index, features], database["freMTPL2freq"].loc[test_index, features], 
    database["freMTPL2freq"].loc[train_index, response], database["freMTPL2freq"].loc[test_index, response],
)

In [None]:
# fit AutoML model
mol = AutoTabularRegressor(
    model_name = "freMTPL2freq_{}".format(n_trials),
    n_estimators = N_ESTIMATORS,
    max_evals = n_trials,
    timeout = TIMEOUT,
    validation=False,
    search_algo="HyperOpt",
    objective= mean_poisson_deviance,
    cpu_threads = 12,
    balancing = ["SimpleRandomOverSampling", "SimpleRandomUnderSampling"],
    seed = seed,    
)
mol.fit(train_X, train_y)

In [None]:
train_pred = mol.predict(train_X)
test_pred = mol.predict(test_X)

mean_poisson_deviance(train_y, train_pred), mean_poisson_deviance(test_y, test_pred)