# AutoML con FLAML

In [None]:
!pip install flaml

## Carga y preparación de datos

In [None]:
import pandas as pd

# Load insurance data
insurance = pd.read_csv('../data/insurance.csv')

In [None]:
# View the data
insurance.head()

In [None]:
# Split the data into features and target
X = insurance.drop('expenses', axis=1)
y = insurance['expenses']

In [None]:
# Split the data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Modelo de AutoML con FLAML

In [None]:
# Modeling with FLAML AutoML
from flaml import AutoML
automl = AutoML()

In [None]:
settings = {
    "time_budget": 90,  # total running time in seconds
    "metric": 'mae',  # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
                           # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'
    "task": 'regression',  # task type, 
    "seed": 7654321,    # random seed
}

In [None]:
automl.fit(X_train=X_train, y_train=y_train, **settings)

In [None]:
automl.model.estimator

In [None]:
# retrieve best config and best learner
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best MAE on validation data: {0:.4g}'.format(automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

In [None]:
# Predict on test data
automl.predict(X_test)

In [None]:
automl.model.estimator.feature_importances_

In [None]:
import matplotlib.pyplot as plt

plt.barh(automl.feature_names_in_, automl.feature_importances_)