# Zindi

In [10]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

train_data = pd.read_csv('train_cleaned_nonBool.csv', low_memory=False)
test_data = pd.read_csv('test_cleaned_nonBool.csv', low_memory=False)
prophet_preds = pd.read_csv('prophet_prediction.csv', low_memory=False)
train_data = train_data.merge(prophet_preds, on='date')
test_data = test_data.merge(prophet_preds, on='date')

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 5400,  # in seconds
    "metric": 'mae',
    "task": 'regression',
    "log_file_name": "zindi.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['ID','device','date','pm2_5']), y_train=train_data['pm2_5'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['ID','device','date']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'pm2_5'})
test_data = test_data.join(preds)
test_data[['ID', 'pm2_5']].to_csv('zindi_flaml.csv', index=False)

[flaml.automl: 10-06 05:53:44] {2600} INFO - task = regression
[flaml.automl: 10-06 05:53:44] {2602} INFO - Data split method: uniform
[flaml.automl: 10-06 05:53:44] {2605} INFO - Evaluation method: cv
[flaml.automl: 10-06 05:53:44] {2727} INFO - Minimizing error metric: mae
[flaml.automl: 10-06 05:53:44] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 10-06 05:53:44] {3174} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-06 05:53:45] {3307} INFO - Estimated sufficient time budget=3005s. Estimated necessary time budget=26s.
[flaml.automl: 10-06 05:53:45] {3354} INFO -  at 0.6s,	estimator lgbm's best error=19.4621,	best estimator lgbm's best error=19.4621
[flaml.automl: 10-06 05:53:45] {3174} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-06 05:53:45] {3354} INFO -  at 0.8s,	estimator lgbm's best error=19.4621,	best estimator lgbm's best error=19.4621
[flaml.automl: 10-06 05:53:4

[flaml.automl: 10-06 05:54:10] {3174} INFO - iteration 36, current learner rf
[flaml.automl: 10-06 05:54:11] {3354} INFO -  at 27.0s,	estimator rf's best error=14.5923,	best estimator lgbm's best error=9.2969
[flaml.automl: 10-06 05:54:11] {3174} INFO - iteration 37, current learner extra_tree
[flaml.automl: 10-06 05:54:12] {3354} INFO -  at 27.4s,	estimator extra_tree's best error=13.3429,	best estimator lgbm's best error=9.2969
[flaml.automl: 10-06 05:54:12] {3174} INFO - iteration 38, current learner rf
[flaml.automl: 10-06 05:54:13] {3354} INFO -  at 28.4s,	estimator rf's best error=12.9896,	best estimator lgbm's best error=9.2969
[flaml.automl: 10-06 05:54:13] {3174} INFO - iteration 39, current learner catboost
[flaml.automl: 10-06 05:54:30] {3354} INFO -  at 45.4s,	estimator catboost's best error=9.3343,	best estimator lgbm's best error=9.2969
[flaml.automl: 10-06 05:54:30] {3174} INFO - iteration 40, current learner rf
[flaml.automl: 10-06 05:54:31] {3354} INFO -  at 46.6s,	est

[flaml.automl: 10-06 05:56:12] {3354} INFO -  at 147.7s,	estimator extra_tree's best error=12.9164,	best estimator xgboost's best error=8.8427
[flaml.automl: 10-06 05:56:12] {3174} INFO - iteration 74, current learner xgboost
[flaml.automl: 10-06 05:56:20] {3354} INFO -  at 155.5s,	estimator xgboost's best error=8.8427,	best estimator xgboost's best error=8.8427
[flaml.automl: 10-06 05:56:20] {3174} INFO - iteration 75, current learner xgb_limitdepth
[flaml.automl: 10-06 05:56:20] {3354} INFO -  at 156.1s,	estimator xgb_limitdepth's best error=12.0709,	best estimator xgboost's best error=8.8427
[flaml.automl: 10-06 05:56:20] {3174} INFO - iteration 76, current learner xgb_limitdepth
[flaml.automl: 10-06 05:56:21] {3354} INFO -  at 156.6s,	estimator xgb_limitdepth's best error=12.0709,	best estimator xgboost's best error=8.8427
[flaml.automl: 10-06 05:56:21] {3174} INFO - iteration 77, current learner xgb_limitdepth
[flaml.automl: 10-06 05:56:21] {3354} INFO -  at 157.2s,	estimator xgb_

[flaml.automl: 10-06 06:05:36] {3174} INFO - iteration 110, current learner extra_tree
[flaml.automl: 10-06 06:05:38] {3354} INFO -  at 714.2s,	estimator extra_tree's best error=12.1033,	best estimator xgboost's best error=8.1496
[flaml.automl: 10-06 06:05:38] {3174} INFO - iteration 111, current learner rf
[flaml.automl: 10-06 06:05:48] {3354} INFO -  at 724.1s,	estimator rf's best error=10.9200,	best estimator xgboost's best error=8.1496
[flaml.automl: 10-06 06:05:48] {3174} INFO - iteration 112, current learner lgbm
[flaml.automl: 10-06 06:05:52] {3354} INFO -  at 727.9s,	estimator lgbm's best error=9.1393,	best estimator xgboost's best error=8.1496
[flaml.automl: 10-06 06:05:52] {3174} INFO - iteration 113, current learner extra_tree
[flaml.automl: 10-06 06:05:56] {3354} INFO -  at 731.8s,	estimator extra_tree's best error=12.1033,	best estimator xgboost's best error=8.1496
[flaml.automl: 10-06 06:05:56] {3174} INFO - iteration 114, current learner xgboost
[flaml.automl: 10-06 06:0

[flaml.automl: 10-06 06:15:36] {3174} INFO - iteration 147, current learner xgboost
[flaml.automl: 10-06 06:16:38] {3354} INFO -  at 1374.0s,	estimator xgboost's best error=8.0159,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:16:38] {3174} INFO - iteration 148, current learner lgbm
[flaml.automl: 10-06 06:16:40] {3354} INFO -  at 1376.1s,	estimator lgbm's best error=8.4040,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:16:40] {3174} INFO - iteration 149, current learner xgboost
[flaml.automl: 10-06 06:18:35] {3354} INFO -  at 1490.8s,	estimator xgboost's best error=8.0159,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:18:35] {3174} INFO - iteration 150, current learner lgbm
[flaml.automl: 10-06 06:18:38] {3354} INFO -  at 1493.9s,	estimator lgbm's best error=8.4040,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:18:38] {3174} INFO - iteration 151, current learner xgboost
[flaml.automl: 10-06 06:19:28] {

[flaml.automl: 10-06 06:41:01] {3174} INFO - iteration 183, current learner catboost
[flaml.automl: 10-06 06:41:41] {3354} INFO -  at 2876.9s,	estimator catboost's best error=9.1667,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:41:41] {3174} INFO - iteration 184, current learner rf
[flaml.automl: 10-06 06:41:47] {3354} INFO -  at 2882.6s,	estimator rf's best error=10.9200,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:41:47] {3174} INFO - iteration 185, current learner xgboost
[flaml.automl: 10-06 06:43:27] {3354} INFO -  at 2982.6s,	estimator xgboost's best error=8.0159,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:43:27] {3174} INFO - iteration 186, current learner xgboost
[flaml.automl: 10-06 06:45:34] {3354} INFO -  at 3110.2s,	estimator xgboost's best error=8.0159,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 06:45:34] {3174} INFO - iteration 187, current learner catboost
[flaml.automl: 10-06 06:45

[flaml.automl: 10-06 07:23:14] {3354} INFO -  at 5370.0s,	estimator rf's best error=10.7440,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 07:23:14] {3174} INFO - iteration 220, current learner rf
[flaml.automl: 10-06 07:23:29] {3354} INFO -  at 5384.6s,	estimator rf's best error=10.7440,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 07:23:29] {3174} INFO - iteration 221, current learner rf
[flaml.automl: 10-06 07:23:36] {3354} INFO -  at 5392.0s,	estimator rf's best error=10.7440,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 07:23:36] {3174} INFO - iteration 222, current learner rf
[flaml.automl: 10-06 07:23:46] {3354} INFO -  at 5401.5s,	estimator rf's best error=10.7329,	best estimator xgboost's best error=8.0159
[flaml.automl: 10-06 07:24:06] {3618} INFO - retrain xgboost for 20.6s
[flaml.automl: 10-06 07:24:06] {3625} INFO - retrained model: XGBRegressor(base_score=0.5, booster='gbtree', callbacks=[],
             colsample_by

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=[],
             colsample_bylevel=0.8858736793180497, colsample_bynode=1,
             colsample_bytree=0.7122813722286, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
             grow_policy='lossguide', importance_type=None,
             interaction_constraints='', learning_rate=0.013538946319743906,
             max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=0,
             max_leaves=119, min_child_weight=5.8994376921285845, missing=nan,
             monotone_constraints='()', n_estimators=2101, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0.023952071094261465, reg_lambda=0.0728957296239715, ...)


In [11]:
del train_data
del test_data
del preds
del automl

# Sber

In [12]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

train_data = pd.read_csv('train_sber.csv', low_memory=False)
test_data = pd.read_csv('test_sber.csv', low_memory=False)

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 5400,  # in seconds
    "metric": 'rmse',
    "task": 'regression',
    "log_file_name": "sber.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['id','price_doc']), y_train=train_data['price_doc'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['id']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'price_doc'})
test_data = test_data.join(preds)
test_data[['id', 'price_doc']].to_csv('sber_flaml.csv', index=False)

[flaml.automl: 10-06 07:24:12] {2600} INFO - task = regression
[flaml.automl: 10-06 07:24:12] {2602} INFO - Data split method: uniform
[flaml.automl: 10-06 07:24:12] {2605} INFO - Evaluation method: cv
[flaml.automl: 10-06 07:24:12] {2727} INFO - Minimizing error metric: rmse
[flaml.automl: 10-06 07:24:12] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 10-06 07:24:12] {3174} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-06 07:24:14] {3307} INFO - Estimated sufficient time budget=24332s. Estimated necessary time budget=208s.
[flaml.automl: 10-06 07:24:14] {3354} INFO -  at 6.3s,	estimator lgbm's best error=4165317.6672,	best estimator lgbm's best error=4165317.6672
[flaml.automl: 10-06 07:24:14] {3174} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-06 07:24:17] {3354} INFO -  at 8.8s,	estimator lgbm's best error=4165317.6672,	best estimator lgbm's best error=4165317.6672
[flam

[flaml.automl: 10-06 07:26:21] {3174} INFO - iteration 34, current learner lgbm
[flaml.automl: 10-06 07:26:44] {3354} INFO -  at 155.7s,	estimator lgbm's best error=2692984.5603,	best estimator lgbm's best error=2692984.5603
[flaml.automl: 10-06 07:26:44] {3174} INFO - iteration 35, current learner rf
[flaml.automl: 10-06 07:26:52] {3354} INFO -  at 164.3s,	estimator rf's best error=2925822.5093,	best estimator lgbm's best error=2692984.5603
[flaml.automl: 10-06 07:26:52] {3174} INFO - iteration 36, current learner extra_tree
[flaml.automl: 10-06 07:26:56] {3354} INFO -  at 168.3s,	estimator extra_tree's best error=2748447.6450,	best estimator lgbm's best error=2692984.5603
[flaml.automl: 10-06 07:26:56] {3174} INFO - iteration 37, current learner xgboost
[flaml.automl: 10-06 07:26:59] {3354} INFO -  at 171.3s,	estimator xgboost's best error=2992874.9800,	best estimator lgbm's best error=2692984.5603
[flaml.automl: 10-06 07:26:59] {3174} INFO - iteration 38, current learner xgboost
[fl

[flaml.automl: 10-06 07:36:38] {3354} INFO -  at 749.9s,	estimator xgb_limitdepth's best error=2779731.1356,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:36:38] {3174} INFO - iteration 70, current learner rf
[flaml.automl: 10-06 07:36:48] {3354} INFO -  at 759.8s,	estimator rf's best error=2757057.7487,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:36:48] {3174} INFO - iteration 71, current learner xgb_limitdepth
[flaml.automl: 10-06 07:36:51] {3354} INFO -  at 762.8s,	estimator xgb_limitdepth's best error=2779731.1356,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:36:51] {3174} INFO - iteration 72, current learner xgb_limitdepth
[flaml.automl: 10-06 07:37:04] {3354} INFO -  at 775.8s,	estimator xgb_limitdepth's best error=2779731.1356,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:37:04] {3174} INFO - iteration 73, current learner xgb_limitdepth
[flaml.automl: 10-06 07:37:12] {

[flaml.automl: 10-06 07:57:35] {3354} INFO -  at 2007.6s,	estimator xgb_limitdepth's best error=2598313.6721,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:57:35] {3174} INFO - iteration 104, current learner lgbm
[flaml.automl: 10-06 07:57:42] {3354} INFO -  at 2014.1s,	estimator lgbm's best error=2680501.0183,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:57:42] {3174} INFO - iteration 105, current learner xgb_limitdepth
[flaml.automl: 10-06 07:58:23] {3354} INFO -  at 2055.0s,	estimator xgb_limitdepth's best error=2598313.6721,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 07:58:23] {3174} INFO - iteration 106, current learner xgboost
[flaml.automl: 10-06 08:02:57] {3354} INFO -  at 2328.8s,	estimator xgboost's best error=2586121.4348,	best estimator xgboost's best error=2586121.4348
[flaml.automl: 10-06 08:02:57] {3174} INFO - iteration 107, current learner xgb_limitdepth
[flaml.automl: 10-06 08:04:05] {33

[flaml.automl: 10-06 08:54:47] {3618} INFO - retrain xgboost for 38.5s
[flaml.automl: 10-06 08:54:47] {3625} INFO - retrained model: XGBRegressor(base_score=0.5, booster='gbtree', callbacks=[],
             colsample_bylevel=0.58431544777044, colsample_bynode=1,
             colsample_bytree=0.8048306437727928, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
             grow_policy='lossguide', importance_type=None,
             interaction_constraints='', learning_rate=0.02761001315275789,
             max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=0,
             max_leaves=49, min_child_weight=10.544588447116023, missing=nan,
             monotone_constraints='()', n_estimators=1313, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0.002943920200427394, reg_lambda=0.03685133435603548, ...)
[flaml.automl: 10-06 08:54:47] {2900} INFO - fit succeeded
[flaml.auto

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=[],
             colsample_bylevel=0.58431544777044, colsample_bynode=1,
             colsample_bytree=0.8048306437727928, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
             grow_policy='lossguide', importance_type=None,
             interaction_constraints='', learning_rate=0.02761001315275789,
             max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=0,
             max_leaves=49, min_child_weight=10.544588447116023, missing=nan,
             monotone_constraints='()', n_estimators=1313, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0.002943920200427394, reg_lambda=0.03685133435603548, ...)


In [13]:
del train_data
del test_data
del preds
del automl

# Santander

In [14]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

train_data = pd.read_csv('train_santander.csv', low_memory=False)
test_data = pd.read_csv('test_santander.csv', low_memory=False)

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 5400,  # in seconds
    "metric": 'roc_auc',
    "task": 'classification',
    "log_file_name": "santander.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['ID','TARGET']), y_train=train_data['TARGET'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['ID']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'TARGET'})
test_data = test_data.join(preds)
test_data[['ID', 'TARGET']].to_csv('santander_flaml.csv', index=False)

[flaml.automl: 10-06 08:55:09] {2600} INFO - task = classification
[flaml.automl: 10-06 08:55:09] {2602} INFO - Data split method: stratified
[flaml.automl: 10-06 08:55:09] {2605} INFO - Evaluation method: holdout
[flaml.automl: 10-06 08:55:09] {2727} INFO - Minimizing error metric: 1-roc_auc
[flaml.automl: 10-06 08:55:09] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 10-06 08:55:09] {3174} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-06 08:55:09] {3307} INFO - Estimated sufficient time budget=12207s. Estimated necessary time budget=300s.
[flaml.automl: 10-06 08:55:09] {3354} INFO -  at 15.5s,	estimator lgbm's best error=0.1886,	best estimator lgbm's best error=0.1886
[flaml.automl: 10-06 08:55:09] {3174} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-06 08:55:10] {3354} INFO -  at 15.6s,	estimator lgbm's best error=0.1886,	best estimator lgbm's best error=0.1886
[f

[flaml.automl: 10-06 08:55:16] {3174} INFO - iteration 36, current learner extra_tree
[flaml.automl: 10-06 08:55:17] {3354} INFO -  at 22.7s,	estimator extra_tree's best error=0.2272,	best estimator lgbm's best error=0.1523
[flaml.automl: 10-06 08:55:17] {3174} INFO - iteration 37, current learner lgbm
[flaml.automl: 10-06 08:55:17] {3354} INFO -  at 23.2s,	estimator lgbm's best error=0.1523,	best estimator lgbm's best error=0.1523
[flaml.automl: 10-06 08:55:17] {3174} INFO - iteration 38, current learner lgbm
[flaml.automl: 10-06 08:55:18] {3354} INFO -  at 24.2s,	estimator lgbm's best error=0.1523,	best estimator lgbm's best error=0.1523
[flaml.automl: 10-06 08:55:18] {3174} INFO - iteration 39, current learner lgbm
[flaml.automl: 10-06 08:55:19] {3354} INFO -  at 25.0s,	estimator lgbm's best error=0.1508,	best estimator lgbm's best error=0.1508
[flaml.automl: 10-06 08:55:19] {3174} INFO - iteration 40, current learner catboost
[flaml.automl: 10-06 08:55:21] {3354} INFO -  at 26.8s,	

[flaml.automl: 10-06 08:55:46] {3174} INFO - iteration 75, current learner extra_tree
[flaml.automl: 10-06 08:55:47] {3354} INFO -  at 52.9s,	estimator extra_tree's best error=0.2272,	best estimator lgbm's best error=0.1425
[flaml.automl: 10-06 08:55:47] {3174} INFO - iteration 76, current learner lgbm
[flaml.automl: 10-06 08:55:47] {3354} INFO -  at 53.5s,	estimator lgbm's best error=0.1425,	best estimator lgbm's best error=0.1425
[flaml.automl: 10-06 08:55:47] {3174} INFO - iteration 77, current learner lgbm
[flaml.automl: 10-06 08:55:48] {3354} INFO -  at 54.0s,	estimator lgbm's best error=0.1425,	best estimator lgbm's best error=0.1425
[flaml.automl: 10-06 08:55:48] {3174} INFO - iteration 78, current learner extra_tree
[flaml.automl: 10-06 08:55:48] {3354} INFO -  at 54.3s,	estimator extra_tree's best error=0.2272,	best estimator lgbm's best error=0.1425
[flaml.automl: 10-06 08:55:48] {3174} INFO - iteration 79, current learner lgbm
[flaml.automl: 10-06 08:55:50] {3354} INFO -  at

[flaml.automl: 10-06 08:56:23] {3354} INFO -  at 88.6s,	estimator lgbm's best error=0.1423,	best estimator lgbm's best error=0.1423
[flaml.automl: 10-06 08:56:23] {3174} INFO - iteration 113, current learner extra_tree
[flaml.automl: 10-06 08:56:23] {3354} INFO -  at 89.2s,	estimator extra_tree's best error=0.2151,	best estimator lgbm's best error=0.1423
[flaml.automl: 10-06 08:56:23] {3174} INFO - iteration 114, current learner lgbm
[flaml.automl: 10-06 08:56:24] {3354} INFO -  at 89.7s,	estimator lgbm's best error=0.1423,	best estimator lgbm's best error=0.1423
[flaml.automl: 10-06 08:56:24] {3174} INFO - iteration 115, current learner xgb_limitdepth
[flaml.automl: 10-06 08:56:26] {3354} INFO -  at 91.7s,	estimator xgb_limitdepth's best error=0.1467,	best estimator lgbm's best error=0.1423
[flaml.automl: 10-06 08:56:26] {3174} INFO - iteration 116, current learner lgbm
[flaml.automl: 10-06 08:56:27] {3354} INFO -  at 92.7s,	estimator lgbm's best error=0.1423,	best estimator lgbm's be

[flaml.automl: 10-06 08:59:14] {3174} INFO - iteration 148, current learner xgboost
[flaml.automl: 10-06 08:59:15] {3354} INFO -  at 260.6s,	estimator xgboost's best error=0.1588,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 08:59:15] {3174} INFO - iteration 149, current learner xgboost
[flaml.automl: 10-06 08:59:15] {3354} INFO -  at 261.1s,	estimator xgboost's best error=0.1563,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 08:59:15] {3174} INFO - iteration 150, current learner xgboost
[flaml.automl: 10-06 08:59:15] {3354} INFO -  at 261.4s,	estimator xgboost's best error=0.1563,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 08:59:15] {3174} INFO - iteration 151, current learner catboost
[flaml.automl: 10-06 08:59:17] {3354} INFO -  at 263.2s,	estimator catboost's best error=0.1558,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 08:59:17] {3174} INFO - iteration 152, current learner xg

[flaml.automl: 10-06 09:03:39] {3354} INFO -  at 525.2s,	estimator catboost's best error=0.1553,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 09:03:39] {3174} INFO - iteration 185, current learner lgbm
[flaml.automl: 10-06 09:03:40] {3354} INFO -  at 526.0s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 09:03:40] {3174} INFO - iteration 186, current learner extra_tree
[flaml.automl: 10-06 09:03:41] {3354} INFO -  at 526.7s,	estimator extra_tree's best error=0.2151,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 09:03:41] {3174} INFO - iteration 187, current learner extra_tree
[flaml.automl: 10-06 09:03:41] {3354} INFO -  at 527.2s,	estimator extra_tree's best error=0.2151,	best estimator xgb_limitdepth's best error=0.1416
[flaml.automl: 10-06 09:03:41] {3174} INFO - iteration 188, current learner rf
[flaml.automl: 10-06 09:03:42] {3354} INFO -  at 528.3s,	estimator rf's best e

[flaml.automl: 10-06 09:12:34] {3174} INFO - iteration 220, current learner rf
[flaml.automl: 10-06 09:12:36] {3354} INFO -  at 1062.1s,	estimator rf's best error=0.1570,	best estimator xgb_limitdepth's best error=0.1412
[flaml.automl: 10-06 09:12:36] {3174} INFO - iteration 221, current learner lgbm
[flaml.automl: 10-06 09:12:38] {3354} INFO -  at 1063.9s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1412
[flaml.automl: 10-06 09:12:38] {3174} INFO - iteration 222, current learner extra_tree
[flaml.automl: 10-06 09:12:39] {3354} INFO -  at 1064.6s,	estimator extra_tree's best error=0.2151,	best estimator xgb_limitdepth's best error=0.1412
[flaml.automl: 10-06 09:12:39] {3174} INFO - iteration 223, current learner lgbm
[flaml.automl: 10-06 09:12:39] {3354} INFO -  at 1065.3s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1412
[flaml.automl: 10-06 09:12:39] {3174} INFO - iteration 224, current learner xgb_limitdepth
[

[flaml.automl: 10-06 09:25:01] {3174} INFO - iteration 255, current learner extra_tree
[flaml.automl: 10-06 09:25:01] {3354} INFO -  at 1807.3s,	estimator extra_tree's best error=0.2151,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:25:01] {3174} INFO - iteration 256, current learner xgb_limitdepth
[flaml.automl: 10-06 09:25:27] {3354} INFO -  at 1833.0s,	estimator xgb_limitdepth's best error=0.1400,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:25:27] {3174} INFO - iteration 257, current learner lgbm
[flaml.automl: 10-06 09:25:30] {3354} INFO -  at 1835.8s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:25:30] {3174} INFO - iteration 258, current learner xgb_limitdepth
[flaml.automl: 10-06 09:27:45] {3354} INFO -  at 1971.4s,	estimator xgb_limitdepth's best error=0.1400,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:27:45] {3174} INFO - ite

[flaml.automl: 10-06 09:43:18] {3174} INFO - iteration 290, current learner rf
[flaml.automl: 10-06 09:43:19] {3354} INFO -  at 2905.1s,	estimator rf's best error=0.1556,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:43:19] {3174} INFO - iteration 291, current learner rf
[flaml.automl: 10-06 09:43:21] {3354} INFO -  at 2907.2s,	estimator rf's best error=0.1556,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:43:21] {3174} INFO - iteration 292, current learner xgb_limitdepth
[flaml.automl: 10-06 09:44:17] {3354} INFO -  at 2962.8s,	estimator xgb_limitdepth's best error=0.1400,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:44:17] {3174} INFO - iteration 293, current learner rf
[flaml.automl: 10-06 09:44:18] {3354} INFO -  at 2963.9s,	estimator rf's best error=0.1556,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:44:18] {3174} INFO - iteration 294, current learner xgb_limitdepth
[

[flaml.automl: 10-06 09:50:11] {3354} INFO -  at 3317.4s,	estimator xgboost's best error=0.1458,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:50:11] {3174} INFO - iteration 327, current learner xgboost
[flaml.automl: 10-06 09:50:15] {3354} INFO -  at 3320.9s,	estimator xgboost's best error=0.1441,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:50:15] {3174} INFO - iteration 328, current learner xgboost
[flaml.automl: 10-06 09:50:20] {3354} INFO -  at 3325.8s,	estimator xgboost's best error=0.1441,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:50:20] {3174} INFO - iteration 329, current learner rf
[flaml.automl: 10-06 09:50:22] {3354} INFO -  at 3328.3s,	estimator rf's best error=0.1480,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:50:22] {3174} INFO - iteration 330, current learner rf
[flaml.automl: 10-06 09:50:25] {3354} INFO -  at 3331.1s,	estimator rf's best error=0.1480,

[flaml.automl: 10-06 09:54:10] {3354} INFO -  at 3556.1s,	estimator rf's best error=0.1468,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:54:10] {3174} INFO - iteration 363, current learner xgboost
[flaml.automl: 10-06 09:54:17] {3354} INFO -  at 3562.6s,	estimator xgboost's best error=0.1433,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:54:17] {3174} INFO - iteration 364, current learner rf
[flaml.automl: 10-06 09:54:19] {3354} INFO -  at 3565.5s,	estimator rf's best error=0.1468,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:54:19] {3174} INFO - iteration 365, current learner extra_tree
[flaml.automl: 10-06 09:54:21] {3354} INFO -  at 3566.5s,	estimator extra_tree's best error=0.2148,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 09:54:21] {3174} INFO - iteration 366, current learner xgb_limitdepth
[flaml.automl: 10-06 09:55:03] {3354} INFO -  at 3608.8s,	estimator xgb_limitd

[flaml.automl: 10-06 10:02:55] {3174} INFO - iteration 398, current learner extra_tree
[flaml.automl: 10-06 10:02:56] {3354} INFO -  at 4081.7s,	estimator extra_tree's best error=0.2075,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:02:56] {3174} INFO - iteration 399, current learner xgboost
[flaml.automl: 10-06 10:03:05] {3354} INFO -  at 4090.6s,	estimator xgboost's best error=0.1433,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:03:05] {3174} INFO - iteration 400, current learner lgbm
[flaml.automl: 10-06 10:03:06] {3354} INFO -  at 4092.3s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:03:06] {3174} INFO - iteration 401, current learner extra_tree
[flaml.automl: 10-06 10:03:09] {3354} INFO -  at 4094.8s,	estimator extra_tree's best error=0.2075,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:03:09] {3174} INFO - iteration 402, current le

[flaml.automl: 10-06 10:09:52] {3354} INFO -  at 4498.3s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:09:52] {3174} INFO - iteration 434, current learner extra_tree
[flaml.automl: 10-06 10:09:53] {3354} INFO -  at 4499.0s,	estimator extra_tree's best error=0.2063,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:09:53] {3174} INFO - iteration 435, current learner catboost
[flaml.automl: 10-06 10:10:17] {3354} INFO -  at 4522.8s,	estimator catboost's best error=0.1483,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:10:17] {3174} INFO - iteration 436, current learner extra_tree
[flaml.automl: 10-06 10:10:18] {3354} INFO -  at 4523.7s,	estimator extra_tree's best error=0.2063,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:10:18] {3174} INFO - iteration 437, current learner catboost
[flaml.automl: 10-06 10:10:26] {3354} INFO -  at 4532.4s,	estima

[flaml.automl: 10-06 10:24:36] {3354} INFO -  at 5382.4s,	estimator extra_tree's best error=0.2063,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:24:36] {3174} INFO - iteration 469, current learner lgbm
[flaml.automl: 10-06 10:24:37] {3354} INFO -  at 5383.1s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:24:37] {3174} INFO - iteration 470, current learner lgbm
[flaml.automl: 10-06 10:24:38] {3354} INFO -  at 5384.1s,	estimator lgbm's best error=0.1423,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:24:38] {3174} INFO - iteration 471, current learner extra_tree
[flaml.automl: 10-06 10:24:39] {3354} INFO -  at 5384.8s,	estimator extra_tree's best error=0.2063,	best estimator xgb_limitdepth's best error=0.1400
[flaml.automl: 10-06 10:24:39] {3174} INFO - iteration 472, current learner lgbm
[flaml.automl: 10-06 10:24:40] {3354} INFO -  at 5385.7s,	estimator lgbm's best er

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=[],
              colsample_bylevel=0.8595694319668747, colsample_bynode=1,
              colsample_bytree=0.685157112794835, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.01769185656006684,
              max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=7,
              max_leaves=0, min_child_weight=2.720236092565601, missing=nan,
              monotone_constraints='()', n_estimators=457, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0.004424495522396838, reg_lambda=1.7552851047800666, ...)


In [15]:
del train_data
del test_data
del preds
del automl

# Liberty

In [1]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

train_data = pd.read_csv('train_liberty.csv', low_memory=False)
test_data = pd.read_csv('test_liberty.csv', low_memory=False)

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 5400,  # in seconds
    "metric": 'rmse',
    "task": 'regression',
    "log_file_name": "liberty.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['id','target']), y_train=train_data['target'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['id']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'target'})
test_data = test_data.join(preds)
test_data[['id', 'target']].to_csv('liberty_flaml.csv', index=False)

[flaml.automl: 10-06 12:00:41] {2600} INFO - task = regression
[flaml.automl: 10-06 12:00:41] {2602} INFO - Data split method: uniform
[flaml.automl: 10-06 12:00:41] {2605} INFO - Evaluation method: holdout
[flaml.automl: 10-06 12:00:43] {2727} INFO - Minimizing error metric: rmse
[flaml.automl: 10-06 12:00:44] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 10-06 12:00:44] {3174} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-06 12:00:45] {3307} INFO - Estimated sufficient time budget=235678s. Estimated necessary time budget=2015s.
[flaml.automl: 10-06 12:00:45] {3354} INFO -  at 85.7s,	estimator lgbm's best error=0.2417,	best estimator lgbm's best error=0.2417
[flaml.automl: 10-06 12:00:45] {3174} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-06 12:00:45] {3354} INFO -  at 86.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06

[flaml.automl: 10-06 12:01:05] {3174} INFO - iteration 36, current learner xgboost
[flaml.automl: 10-06 12:01:05] {3354} INFO -  at 106.3s,	estimator xgboost's best error=0.2436,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:01:05] {3174} INFO - iteration 37, current learner lgbm
[flaml.automl: 10-06 12:01:06] {3354} INFO -  at 107.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:01:06] {3174} INFO - iteration 38, current learner rf
[flaml.automl: 10-06 12:01:11] {3354} INFO -  at 111.4s,	estimator rf's best error=0.2427,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:01:11] {3174} INFO - iteration 39, current learner lgbm
[flaml.automl: 10-06 12:01:16] {3354} INFO -  at 116.4s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:01:16] {3174} INFO - iteration 40, current learner xgboost
[flaml.automl: 10-06 12:01:16] {3354} INFO -  at 117.1s,	estima

[flaml.automl: 10-06 12:03:26] {3174} INFO - iteration 74, current learner xgboost
[flaml.automl: 10-06 12:03:27] {3354} INFO -  at 248.0s,	estimator xgboost's best error=0.2435,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:03:27] {3174} INFO - iteration 75, current learner extra_tree
[flaml.automl: 10-06 12:03:33] {3354} INFO -  at 254.1s,	estimator extra_tree's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:03:33] {3174} INFO - iteration 76, current learner lgbm
[flaml.automl: 10-06 12:03:38] {3354} INFO -  at 259.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:03:38] {3174} INFO - iteration 77, current learner lgbm
[flaml.automl: 10-06 12:03:44] {3354} INFO -  at 264.9s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:03:44] {3174} INFO - iteration 78, current learner extra_tree
[flaml.automl: 10-06 12:03:59] {3354} INFO -

[flaml.automl: 10-06 12:06:02] {3174} INFO - iteration 111, current learner xgboost
[flaml.automl: 10-06 12:06:07] {3354} INFO -  at 407.4s,	estimator xgboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:06:07] {3174} INFO - iteration 112, current learner lgbm
[flaml.automl: 10-06 12:06:11] {3354} INFO -  at 412.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:06:11] {3174} INFO - iteration 113, current learner xgb_limitdepth
[flaml.automl: 10-06 12:06:13] {3354} INFO -  at 413.4s,	estimator xgb_limitdepth's best error=0.2428,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:06:13] {3174} INFO - iteration 114, current learner xgboost
[flaml.automl: 10-06 12:06:19] {3354} INFO -  at 420.2s,	estimator xgboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:06:19] {3174} INFO - iteration 115, current learner xgb_limitdepth
[flaml.automl: 10-06 

[flaml.automl: 10-06 12:10:39] {3174} INFO - iteration 148, current learner lgbm
[flaml.automl: 10-06 12:10:45] {3354} INFO -  at 685.6s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:10:45] {3174} INFO - iteration 149, current learner lgbm
[flaml.automl: 10-06 12:10:51] {3354} INFO -  at 691.3s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:10:51] {3174} INFO - iteration 150, current learner lgbm
[flaml.automl: 10-06 12:10:57] {3354} INFO -  at 697.3s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:10:57] {3174} INFO - iteration 151, current learner lgbm
[flaml.automl: 10-06 12:11:01] {3354} INFO -  at 702.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:11:01] {3174} INFO - iteration 152, current learner rf
[flaml.automl: 10-06 12:11:52] {3354} INFO -  at 753.0s,	estimato

[flaml.automl: 10-06 12:16:33] {3174} INFO - iteration 185, current learner catboost
[flaml.automl: 10-06 12:16:51] {3354} INFO -  at 1051.9s,	estimator catboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:16:51] {3174} INFO - iteration 186, current learner xgboost
[flaml.automl: 10-06 12:16:58] {3354} INFO -  at 1058.5s,	estimator xgboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:16:58] {3174} INFO - iteration 187, current learner rf
[flaml.automl: 10-06 12:17:24] {3354} INFO -  at 1085.0s,	estimator rf's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:17:24] {3174} INFO - iteration 188, current learner xgb_limitdepth
[flaml.automl: 10-06 12:17:29] {3354} INFO -  at 1089.5s,	estimator xgb_limitdepth's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:17:29] {3174} INFO - iteration 189, current learner lgbm
[flaml.automl: 10-06 12:17:35

[flaml.automl: 10-06 12:21:25] {3174} INFO - iteration 222, current learner lgbm
[flaml.automl: 10-06 12:21:34] {3354} INFO -  at 1334.7s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:21:34] {3174} INFO - iteration 223, current learner xgb_limitdepth
[flaml.automl: 10-06 12:21:40] {3354} INFO -  at 1340.5s,	estimator xgb_limitdepth's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:21:40] {3174} INFO - iteration 224, current learner xgb_limitdepth
[flaml.automl: 10-06 12:21:48] {3354} INFO -  at 1349.2s,	estimator xgb_limitdepth's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:21:48] {3174} INFO - iteration 225, current learner lgbm
[flaml.automl: 10-06 12:21:54] {3354} INFO -  at 1355.2s,	estimator lgbm's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:21:54] {3174} INFO - iteration 226, current learner lgbm
[flaml.automl: 10-0

[flaml.automl: 10-06 12:26:14] {3174} INFO - iteration 259, current learner xgb_limitdepth
[flaml.automl: 10-06 12:26:19] {3354} INFO -  at 1619.9s,	estimator xgb_limitdepth's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:26:19] {3174} INFO - iteration 260, current learner xgboost
[flaml.automl: 10-06 12:26:26] {3354} INFO -  at 1626.6s,	estimator xgboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:26:26] {3174} INFO - iteration 261, current learner extra_tree
[flaml.automl: 10-06 12:26:42] {3354} INFO -  at 1642.8s,	estimator extra_tree's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:26:42] {3174} INFO - iteration 262, current learner xgboost
[flaml.automl: 10-06 12:26:49] {3354} INFO -  at 1649.7s,	estimator xgboost's best error=0.2415,	best estimator lgbm's best error=0.2415
[flaml.automl: 10-06 12:26:49] {3174} INFO - iteration 263, current learner xgboost
[flaml.autom

[flaml.automl: 10-06 12:38:22] {3174} INFO - iteration 297, current learner xgboost
[flaml.automl: 10-06 12:38:30] {3354} INFO -  at 2350.7s,	estimator xgboost's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:38:30] {3174} INFO - iteration 298, current learner catboost
[flaml.automl: 10-06 12:38:33] {3354} INFO -  at 2353.6s,	estimator catboost's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:38:33] {3174} INFO - iteration 299, current learner rf
[flaml.automl: 10-06 12:39:55] {3354} INFO -  at 2436.1s,	estimator rf's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:39:55] {3174} INFO - iteration 300, current learner rf
[flaml.automl: 10-06 12:41:29] {3354} INFO -  at 2530.1s,	estimator rf's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:41:29] {3174} INFO - iteration 301, current learner lgbm
[flaml.automl: 10-06 12:41:36] {3354} INFO -  at 2536.5s,	est

[flaml.automl: 10-06 12:48:53] {3174} INFO - iteration 335, current learner xgboost
[flaml.automl: 10-06 12:49:02] {3354} INFO -  at 2982.3s,	estimator xgboost's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:49:02] {3174} INFO - iteration 336, current learner lgbm
[flaml.automl: 10-06 12:49:09] {3354} INFO -  at 2989.6s,	estimator lgbm's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:49:09] {3174} INFO - iteration 337, current learner xgboost
[flaml.automl: 10-06 12:49:16] {3354} INFO -  at 2996.7s,	estimator xgboost's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:49:16] {3174} INFO - iteration 338, current learner lgbm
[flaml.automl: 10-06 12:49:23] {3354} INFO -  at 3004.0s,	estimator lgbm's best error=0.2415,	best estimator rf's best error=0.2415
[flaml.automl: 10-06 12:49:23] {3174} INFO - iteration 339, current learner catboost
[flaml.automl: 10-06 12:49:25] {3354} INFO -  at 30

[flaml.automl: 10-06 12:55:03] {3174} INFO - iteration 372, current learner xgb_limitdepth
[flaml.automl: 10-06 12:55:11] {3354} INFO -  at 3351.9s,	estimator xgb_limitdepth's best error=0.2415,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 12:55:11] {3174} INFO - iteration 373, current learner xgboost
[flaml.automl: 10-06 12:55:18] {3354} INFO -  at 3358.6s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 12:55:18] {3174} INFO - iteration 374, current learner xgboost
[flaml.automl: 10-06 12:55:25] {3354} INFO -  at 3365.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 12:55:25] {3174} INFO - iteration 375, current learner xgboost
[flaml.automl: 10-06 12:55:32] {3354} INFO -  at 3373.3s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 12:55:32] {3174} INFO - iteration 376, current learner xgboost
[flaml

[flaml.automl: 10-06 13:02:27] {3174} INFO - iteration 409, current learner xgboost
[flaml.automl: 10-06 13:02:35] {3354} INFO -  at 3796.1s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:02:35] {3174} INFO - iteration 410, current learner xgboost
[flaml.automl: 10-06 13:02:42] {3354} INFO -  at 3802.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:02:42] {3174} INFO - iteration 411, current learner xgboost
[flaml.automl: 10-06 13:02:50] {3354} INFO -  at 3810.8s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:02:50] {3174} INFO - iteration 412, current learner xgboost
[flaml.automl: 10-06 13:02:58] {3354} INFO -  at 3818.6s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:02:58] {3174} INFO - iteration 413, current learner xgboost
[flaml.automl: 10-06

[flaml.automl: 10-06 13:07:25] {3174} INFO - iteration 446, current learner xgboost
[flaml.automl: 10-06 13:07:31] {3354} INFO -  at 4092.3s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:07:31] {3174} INFO - iteration 447, current learner lgbm
[flaml.automl: 10-06 13:07:39] {3354} INFO -  at 4100.2s,	estimator lgbm's best error=0.2415,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:07:39] {3174} INFO - iteration 448, current learner xgboost
[flaml.automl: 10-06 13:07:46] {3354} INFO -  at 4107.1s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:07:46] {3174} INFO - iteration 449, current learner lgbm
[flaml.automl: 10-06 13:07:53] {3354} INFO -  at 4113.7s,	estimator lgbm's best error=0.2415,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:07:53] {3174} INFO - iteration 450, current learner xgboost
[flaml.automl: 10-06 13:07:59] {

[flaml.automl: 10-06 13:12:46] {3174} INFO - iteration 483, current learner xgboost
[flaml.automl: 10-06 13:12:53] {3354} INFO -  at 4413.6s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:12:53] {3174} INFO - iteration 484, current learner lgbm
[flaml.automl: 10-06 13:13:00] {3354} INFO -  at 4420.8s,	estimator lgbm's best error=0.2415,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:13:00] {3174} INFO - iteration 485, current learner xgboost
[flaml.automl: 10-06 13:13:07] {3354} INFO -  at 4427.4s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:13:07] {3174} INFO - iteration 486, current learner xgboost
[flaml.automl: 10-06 13:13:14] {3354} INFO -  at 4434.8s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:13:14] {3174} INFO - iteration 487, current learner xgboost
[flaml.automl: 10-06 13:13

[flaml.automl: 10-06 13:17:05] {3174} INFO - iteration 520, current learner xgboost
[flaml.automl: 10-06 13:17:11] {3354} INFO -  at 4671.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:17:11] {3174} INFO - iteration 521, current learner xgboost
[flaml.automl: 10-06 13:17:17] {3354} INFO -  at 4677.4s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:17:17] {3174} INFO - iteration 522, current learner xgboost
[flaml.automl: 10-06 13:17:22] {3354} INFO -  at 4683.2s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:17:22] {3174} INFO - iteration 523, current learner xgboost
[flaml.automl: 10-06 13:17:29] {3354} INFO -  at 4689.8s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:17:29] {3174} INFO - iteration 524, current learner xgboost
[flaml.automl: 10-06

[flaml.automl: 10-06 13:20:40] {3354} INFO -  at 4880.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:20:40] {3174} INFO - iteration 557, current learner xgboost
[flaml.automl: 10-06 13:20:46] {3354} INFO -  at 4887.0s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:20:46] {3174} INFO - iteration 558, current learner xgboost
[flaml.automl: 10-06 13:20:53] {3354} INFO -  at 4893.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:20:53] {3174} INFO - iteration 559, current learner xgboost
[flaml.automl: 10-06 13:21:00] {3354} INFO -  at 4901.1s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:21:00] {3174} INFO - iteration 560, current learner xgboost
[flaml.automl: 10-06 13:21:07] {3354} INFO -  at 4907.6s,	estimator xgboost's best error=0.2411,	best es

[flaml.automl: 10-06 13:24:29] {3354} INFO -  at 5109.4s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:24:29] {3174} INFO - iteration 594, current learner xgboost
[flaml.automl: 10-06 13:24:34] {3354} INFO -  at 5115.2s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:24:34] {3174} INFO - iteration 595, current learner xgboost
[flaml.automl: 10-06 13:24:46] {3354} INFO -  at 5126.5s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:24:46] {3174} INFO - iteration 596, current learner xgboost
[flaml.automl: 10-06 13:24:52] {3354} INFO -  at 5133.0s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:24:52] {3174} INFO - iteration 597, current learner xgboost
[flaml.automl: 10-06 13:25:03] {3354} INFO -  at 5143.9s,	estimator xgboost's best error=0.2411,	best es

[flaml.automl: 10-06 13:28:17] {3174} INFO - iteration 630, current learner xgboost
[flaml.automl: 10-06 13:28:23] {3354} INFO -  at 5344.0s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:28:23] {3174} INFO - iteration 631, current learner xgboost
[flaml.automl: 10-06 13:28:29] {3354} INFO -  at 5349.8s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:28:29] {3174} INFO - iteration 632, current learner xgboost
[flaml.automl: 10-06 13:28:34] {3354} INFO -  at 5355.2s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:28:34] {3174} INFO - iteration 633, current learner xgboost
[flaml.automl: 10-06 13:28:42] {3354} INFO -  at 5362.4s,	estimator xgboost's best error=0.2411,	best estimator xgboost's best error=0.2411
[flaml.automl: 10-06 13:28:42] {3174} INFO - iteration 634, current learner xgboost
[flaml.automl: 10-06

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=[],
             colsample_bylevel=0.7483926047595993, colsample_bynode=1,
             colsample_bytree=0.8305896976473908, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
             grow_policy='lossguide', importance_type=None,
             interaction_constraints='', learning_rate=0.8554453100853391,
             max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=0,
             max_leaves=6, min_child_weight=6.624730534266604, missing=nan,
             monotone_constraints='()', n_estimators=1, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0.18780383273386508, reg_lambda=0.1298892698165144, ...)


In [None]:
del train_data
del test_data
del preds
del automl

# Loan

In [1]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

train_data = pd.read_csv('train_loan.csv')
train_data = train_data.replace('NA', np.nan)
test_data = pd.read_csv('test_loan.csv')
test_data = test_data.replace('NA', np.nan)

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 5400,  # in seconds
    "metric": 'mae',
    "task": 'regression',
    "log_file_name": "loan2.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['id','loss']), y_train=train_data['loss'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['id']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'loss'})
test_data = test_data.join(preds)
test_data[['id', 'loss']].to_csv('loan_flaml.csv', index=False)

[flaml.automl: 10-06 15:26:24] {2600} INFO - task = regression
[flaml.automl: 10-06 15:26:24] {2602} INFO - Data split method: uniform
[flaml.automl: 10-06 15:26:24] {2605} INFO - Evaluation method: holdout
[flaml.automl: 10-06 15:26:25] {2727} INFO - Minimizing error metric: mae
[flaml.automl: 10-06 15:26:27] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 10-06 15:26:27] {3174} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-06 15:26:28] {3307} INFO - Estimated sufficient time budget=147673s. Estimated necessary time budget=1263s.
[flaml.automl: 10-06 15:26:28] {3354} INFO -  at 107.0s,	estimator lgbm's best error=1.4341,	best estimator lgbm's best error=1.4341
[flaml.automl: 10-06 15:26:28] {3174} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-06 15:26:30] {3354} INFO -  at 108.7s,	estimator lgbm's best error=1.4336,	best estimator lgbm's best error=1.4336
[flaml.automl: 10-0

OverflowError: int too big to convert

In [None]:
del train_data
del test_data
del preds
del automl

In [1]:
from flaml import AutoML
import pandas as pd
import numpy as np
import warnings
from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

col_to_drop = ['Valor dólar informal semestral', 'identificador', 'Año', 'Semestre', 'target']

train_data = pd.read_csv('train_data.csv', low_memory=False)
train_data['target'] = train_data['Valor dólar informal semestral']*train_data['Salario mensual (en tu moneda local)']
test_data = pd.read_csv('features.csv', low_memory=False)

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 300,  # in seconds
    "metric": 'rmse',
    "task": 'regression',
    "log_file_name": "rga.log",
}
# Train with labeled input data
automl.fit(X_train=train_data.drop(columns=['Valor dólar informal semestral', 'identificador', 'Año', 'Semestre', 'target']), y_train=train_data['target'], **automl_settings)
# Predict
preds = automl.predict(test_data.drop(columns=['Valor dólar informal semestral', 'identificador', 'Año', 'Semestre']))
# Print the best model
print(automl.model.estimator)

preds = pd.DataFrame(preds).rename(columns={0:'target'})
test_data = test_data.join(preds)
test_data[['id', 'target']].to_csv('rga_flaml.csv', index=False)

[flaml.automl: 10-18 20:50:07] {2600} INFO - task = regression
[flaml.automl: 10-18 20:50:07] {2602} INFO - Data split method: uniform
[flaml.automl: 10-18 20:50:07] {2605} INFO - Evaluation method: holdout
[flaml.automl: 10-18 20:50:07] {2727} INFO - Minimizing error metric: rmse
[flaml.automl: 10-18 20:50:08] {2869} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 10-18 20:50:08] {3174} INFO - iteration 0, current learner lgbm


LightGBMError: Do not support special JSON characters in feature name.

In [2]:
list(train_data.columns)

['identificador',
 'Me identifico',
 'Tengo',
 'Dónde estás trabajando',
 'Años de experiencia',
 'Años en el puesto actual',
 'Trabajo de',
 'Plataformas',
 'Lenguajes de programación o tecnologías.',
 'Automation o funcional?',
 'QA / Testing',
 'Bases de datos',
 '¿Tenés guardias?',
 'Cuánto cobrás por guardia',
 '¿Porcentaje, bruto o neto?',
 'Tipo de contrato',
 'Salario mensual (en tu moneda local)',
 '¿Qué tan conforme estás con tu sueldo?',
 'Recibís algún tipo de bono',
 'A qué está atado el bono',
 'Cantidad de personas en tu organización',
 'Año',
 'Semestre',
 '¿Gente a cargo?',
 'Frameworks, herramientas y librerías',
 'Nivel de estudios alcanzado',
 'Estado',
 'Carrera',
 'Realizaste cursos de especialización',
 'Años en la empresa actual',
 'Universidad',
 '¿Contribuís a proyectos open source?',
 '¿Programás como hobbie?',
 'Lenguajes de programación',
 'IDEs',
 '¿Qué SO usás en tu laptop/PC para trabajar?',
 '¿Sufriste o presenciaste situaciones de violencia laboral?',
