# AutoML con FLAML

In [1]:
!pip install flaml

Collecting flaml
  Downloading FLAML-1.0.11-py3-none-any.whl (205 kB)
[K     |████████████████████████████████| 205 kB 2.5 MB/s eta 0:00:01
Collecting lightgbm>=2.3.1
  Using cached lightgbm-3.3.2-py3-none-macosx_10_14_x86_64.macosx_10_15_x86_64.macosx_11_0_x86_64.whl (1.2 MB)
Collecting xgboost>=0.90
  Downloading xgboost-1.6.2-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 19.3 MB/s eta 0:00:01
Installing collected packages: xgboost, lightgbm, flaml
Successfully installed flaml-1.0.11 lightgbm-3.3.2 xgboost-1.6.2


In [4]:
import pandas as pd

# Load insurance data
insurance = pd.read_csv('data/insurance.csv')

In [5]:
# View the data
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,expenses
0,19,female,27.9,0,yes,southwest,16884.92
1,18,male,33.8,1,no,southeast,1725.55
2,28,male,33.0,3,no,southeast,4449.46
3,33,male,22.7,0,no,northwest,21984.47
4,32,male,28.9,0,no,northwest,3866.86


In [7]:
# Split the data into features and target
X = insurance.drop('expenses', axis=1)
y = insurance['expenses']

In [8]:
# Split the data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Modeling with FLAML AutoML
from flaml import AutoML
automl = AutoML()

In [10]:
settings = {
    "time_budget": 90,  # total running time in seconds
    "metric": 'mae',  # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
                           # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'
    "task": 'regression',  # task type
    "log_file_name": 'insurance.log',  # flaml log file
    "seed": 7654321,    # random seed
}

In [11]:
automl.fit(X_train=X_train, y_train=y_train, **settings)

[flaml.automl: 08-29 08:15:38] {2565} INFO - task = regression
[flaml.automl: 08-29 08:15:38] {2567} INFO - Data split method: uniform
[flaml.automl: 08-29 08:15:38] {2570} INFO - Evaluation method: cv
[flaml.automl: 08-29 08:15:39] {2689} INFO - Minimizing error metric: mae
[flaml.automl: 08-29 08:15:39] {2831} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 08-29 08:15:39] {3133} INFO - iteration 0, current learner lgbm
[flaml.automl: 08-29 08:15:39] {3266} INFO - Estimated sufficient time budget=903s. Estimated necessary time budget=6s.
[flaml.automl: 08-29 08:15:39] {3313} INFO -  at 0.1s,	estimator lgbm's best error=6712.0448,	best estimator lgbm's best error=6712.0448
[flaml.automl: 08-29 08:15:39] {3133} INFO - iteration 1, current learner lgbm
[flaml.automl: 08-29 08:15:39] {3313} INFO -  at 0.2s,	estimator lgbm's best error=6712.0448,	best estimator lgbm's best error=6712.0448
[flaml.automl: 08-29 08:15:39] {31

In [12]:
automl.model.estimator

In [14]:
# retrieve best config and best learner
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy on validation data: {0:.4g}'.format(automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

Best ML leaner: xgboost
Best hyperparmeter config: {'n_estimators': 45, 'max_leaves': 11, 'min_child_weight': 22.083196219068817, 'learning_rate': 0.06643837015042209, 'subsample': 0.9071165289712618, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 0.07357797254765185, 'reg_lambda': 0.4343467403646227}
Best accuracy on validation data: 2368
Training duration of best run: 0.2772 s
