<a href="https://colab.research.google.com/github/SubramanyamSahoo/ML-Libraries/blob/main/FLAML_AutoML_Library.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## FLAML: A Fast and Lightweight AutoML Library

In [1]:
!pip install flaml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flaml
  Downloading FLAML-1.1.1-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.1/216.1 KB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightgbm>=2.3.1
  Downloading lightgbm-3.3.4-py3-none-manylinux1_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightgbm, flaml
  Attempting uninstall: lightgbm
    Found existing installation: lightgbm 2.2.3
    Uninstalling lightgbm-2.2.3:
      Successfully uninstalled lightgbm-2.2.3
Successfully installed flaml-1.1.1 lightgbm-3.3.4


# Classification Problem

In [2]:
from flaml import AutoML
from sklearn.datasets import load_iris
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 10,  # in seconds
    "metric": 'accuracy',
    "task": 'classification',
    "log_file_name": "iris.log",
}
X_train, y_train = load_iris(return_X_y=True)
# Train with labeled input data
automl.fit(X_train=X_train, y_train=y_train,
           **automl_settings)


[flaml.automl.automl: 01-10 14:20:02] {2715} INFO - task = classification
[flaml.automl.automl: 01-10 14:20:02] {2717} INFO - Data split method: stratified
[flaml.automl.automl: 01-10 14:20:02] {2720} INFO - Evaluation method: cv
[flaml.automl.automl: 01-10 14:20:02] {2847} INFO - Minimizing error metric: 1-accuracy
[flaml.automl.automl: 01-10 14:20:02] {2993} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.automl: 01-10 14:20:02] {3322} INFO - iteration 0, current learner lgbm
[flaml.automl.automl: 01-10 14:20:02] {3460} INFO - Estimated sufficient time budget=651s. Estimated necessary time budget=15s.
[flaml.automl.automl: 01-10 14:20:02] {3507} INFO -  at 0.1s,	estimator lgbm's best error=0.0733,	best estimator lgbm's best error=0.0733
[flaml.automl.automl: 01-10 14:20:02] {3322} INFO - iteration 1, current learner lgbm
[flaml.automl.automl: 01-10 14:20:02] {3507} INFO -  at 0.1s,	estimator lgbm's best error=0

In [3]:
# Predict
print(automl.predict_proba(X_train))
# Export the best model
print(automl.model)

[[0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.81927853 0.15011003 0.03061144]
 [0.7725578  0.20229926 0.02514294]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02228459]
 [0.95482095 0.02289446 0.02

# Regression Problem

In [4]:
## Regression Problem
from flaml import AutoML
from sklearn.datasets import load_boston
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 10,  # in seconds
    "metric": 'r2',
    "task": 'regression',
    "log_file_name": "boston.log",
}
X_train, y_train = load_boston(return_X_y=True)
# Train with labeled input data
automl.fit(X_train=X_train, y_train=y_train,
           **automl_settings)
# Predict
print(automl.predict(X_train))
# Export the best model
print(automl.model)

[flaml.automl.automl: 01-10 14:21:11] {2715} INFO - task = regression
[flaml.automl.automl: 01-10 14:21:11] {2717} INFO - Data split method: uniform
[flaml.automl.automl: 01-10 14:21:11] {2720} INFO - Evaluation method: cv
[flaml.automl.automl: 01-10 14:21:11] {2847} INFO - Minimizing error metric: 1-r2
[flaml.automl.automl: 01-10 14:21:11] {2993} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 0, current learner lgbm
[flaml.automl.automl: 01-10 14:21:11] {3460} INFO - Estimated sufficient time budget=306s. Estimated necessary time budget=2s.
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.1s,	estimator lgbm's best error=0.6237,	best estimator lgbm's best error=0.6237
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 1, current learner lgbm
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.1s,	estimator lgbm's best error=0.6237,	best estimator 


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.2s,	estimator xgboost's best error=3.1963,	best estimator lgbm's best error=0.6237
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 4, current learner lgbm
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.3s,	estimator lgbm's best error=0.3431,	best estimator lgbm's best error=0.3431
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 5, current learner extra_tree
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.4s,	estimator extra_tree's best error=0.3741,	best estimator lgbm's best error=0.3431
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 6, current learner rf
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.5s,	estimator rf's best error=0.3202,	best estimator rf's best error=0.3202
[flaml.automl.automl: 01-10 14:21:11] {3322} INFO - iteration 7, current learner rf
[flaml.automl.automl: 01-10 14:21:11] {3507} INFO -  at 0.6s,	estimator rf's best error=0.2425,	b

In [5]:
# Predict
print(automl.predict(X_train))
# Export the best model
print(automl.model)

[25.72840321 23.21403184 33.98584105 31.9401929  33.08595679 25.89485237
 21.18908043 20.24317025 16.56840372 19.56276983 19.28868307 20.35284969
 19.89167097 19.93256457 19.29235516 19.93256457 21.12339596 18.36112017
 20.3664458  19.57380258 15.41315635 17.80385523 16.58129047 15.52884899
 17.72772856 17.43665576 17.72772856 16.58129047 19.84007801 20.81804605
 14.31605561 17.30563119 13.90291459 16.58129047 15.52884899 21.03886784
 20.77981444 21.35190333 21.0672076  29.53868176 33.82102623 29.18069903
 23.90528684 22.93799195 22.5155001  21.63215987 21.3447667  20.2257857
 16.56840372 20.55468684 20.86739543 22.46062068 25.52235102 21.78022025
 19.40007729 33.61426367 24.02564045 32.24558973 22.70250744 21.35190333
 20.77981444 19.19727389 23.61719112 26.54652656 31.94150794 24.02395954
 20.99910232 21.26182092 20.18240243 21.26182092 23.78620898 21.35111546
 22.63534261 22.54205754 22.17695571 22.02724986 20.62455813 21.31376838
 21.18473885 21.51670819 28.21384397 25.2225348  22.