In [1]:
from lale.grammar import Grammar, explore

# Simple: First example

In [2]:
from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier, PCA, StandardScaler

g = Grammar()

g.start       = g.estimator
g.estimator   = g.prim_est | g.transformer >> g.prim_est
g.transformer = g.prim_tfm | g.prim_tfm >> g.transformer

g.prim_tfm    = PCA | StandardScaler
g.prim_est = LogisticRegression | KNeighborsClassifier

generated = explore(g, 4)
generated.to_json()

{'class': 'lale.operators.PlannedPipeline',
 'state': 'planned',
 'edges': [],
 'steps': [{'class': 'lale.operators.OperatorChoice',
   'operator': 'LogisticRegression | KNeighborsClassifier | pipeline_4895881480',
   'state': 'planned',
   'steps': [{'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
     'state': 'planned',
     'operator': 'LogisticRegression',
     'documentation_url': 'http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html'},
    {'class': 'lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl',
     'state': 'planned',
     'operator': 'KNeighborsClassifier',
     'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html'},
    {'class': 'lale.operators.PlannedPipeline',
     'state': 'planned',
     'edges': [[0, 1]],
     'steps': [{'class': 'lale.operators.OperatorChoice',
       'operator': 'PCA | StandardScaler | pipeline_489588131

## Training

In [3]:
from lale.lib.lale import HyperoptCV
import lale.datasets
(train_X, train_y), (test_X, test_y) = lale.datasets.load_iris_df()

trainer = HyperoptCV(estimator=generated, cv=2, max_evals=3, scoring='r2')
trained = trainer.fit(train_X, train_y)

100%|██████████| 3/3 [00:01<00:00,  2.07it/s, best loss: -0.9014363327674024]


In [4]:
from lale.helpers import best_estimator, to_graphviz
best_estimator(trained).to_json()

{'class': 'lale.operators.TrainedPipeline',
 'state': 'trained',
 'edges': [[0, 1], [1, 2]],
 'steps': [{'class': 'lale.lib.sklearn.standard_scaler.StandardScalerImpl',
   'state': 'trained',
   'operator': 'StandardScaler',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html',
   'hyperparams': {'copy': False},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.pca.PCAImpl',
   'state': 'trained',
   'operator': 'PCA',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html',
   'hyperparams': {},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
   'state': 'trained',
   'operator': 'LogisticRegression',
   'documentation_url': 'http://scikit-learn.org/stable/modules/gener

# Grammar that exercices all combinators

In [5]:
from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier, PCA, StandardScaler
from lale.lib.autogen import AdaBoostClassifier
from lale.lib.lale import ConcatFeatures

g = Grammar()

g.start       = g.estimator
g.estimator   = g.term_est | g.transformer >> g.term_est
g.term_est    = g.prim_est # | g.ensemble      # Todo add higher-order operators
# g.ensemble    = g.ensembler ( g.estimator )
g.transformer = g.union_tfm | g.union_tfm >> g.transformer
g.union_tfm   = g.prim_tfm | g.union_body >> ConcatFeatures
g.union_body  = g.transformer | g.transformer & g.union_body

g.prim_tfm    = PCA | StandardScaler
g.prim_est    = LogisticRegression | KNeighborsClassifier
g.ensembler   = AdaBoostClassifier

generated = explore(g, 5)
generated.to_json()

{'class': 'lale.operators.PlannedPipeline',
 'state': 'planned',
 'edges': [],
 'steps': [{'class': 'lale.operators.OperatorChoice',
   'operator': 'LogisticRegression | KNeighborsClassifier | pipeline_4899925912',
   'state': 'planned',
   'steps': [{'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
     'state': 'planned',
     'operator': 'LogisticRegression',
     'documentation_url': 'http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html'},
    {'class': 'lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl',
     'state': 'planned',
     'operator': 'KNeighborsClassifier',
     'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html'},
    {'class': 'lale.operators.PlannedPipeline',
     'state': 'planned',
     'edges': [[0, 1]],
     'steps': [{'class': 'lale.operators.OperatorChoice',
       'operator': 'PCA | StandardScaler | pipeline_489982380

In [6]:
trainer = HyperoptCV(estimator=generated, cv=2, max_evals=3, scoring='r2')
trained = trainer.fit(train_X, train_y)

100%|██████████| 3/3 [00:01<00:00,  1.94it/s, best loss: -0.8770363327674024]


In [7]:
best_estimator(trained).to_json()

{'class': 'lale.operators.TrainedPipeline',
 'state': 'trained',
 'edges': [[0, 1], [1, 2]],
 'steps': [{'class': 'lale.lib.sklearn.standard_scaler.StandardScalerImpl',
   'state': 'trained',
   'operator': 'StandardScaler',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html',
   'hyperparams': {'copy': False},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.pca.PCAImpl',
   'state': 'trained',
   'operator': 'PCA',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html',
   'hyperparams': {},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
   'state': 'trained',
   'operator': 'LogisticRegression',
   'documentation_url': 'http://scikit-learn.org/stable/modules/gener

# recipe: grammar from this [paper](https://link.springer.com/chapter/10.1007/978-3-319-55696-3_16)

In [8]:
from lale.lib.sklearn import SimpleImputer, PCA, FeatureAgglomeration, PolynomialFeatures, RandomForestClassifier, DecisionTreeClassifier
from sklearn.feature_selection import SelectKBest
from lale.lib.autogen import  GaussianNB, MultinomialNB, BernoulliNB
import lale.helpers

lale.helpers.wrap_imported_operators()


g = Grammar()

g.start = g.algorithm | g.preprocessing >> g.algorithm
g.preprocessing = g.imputation >> g.dimensionality_definition | g.dimensionality_definition
g.dimensionality_definition = g.feature_selection >> g.feature_construction | g.feature_selection | g.feature_construction
g.feature_selection = g.unsupervised # | g.supervised 
g.algorithm = g.naive_bayes | g.trees

g.imputation = SimpleImputer
# g.supervised = SelectKBest
g.unsupervised = PCA | FeatureAgglomeration
g.feature_construction = PolynomialFeatures
g.naive_bayes = GaussianNB | MultinomialNB | BernoulliNB
g.trees = g.DecisionTree | g.RandomForest


generated = explore(g, 4)
generated.to_json()

{'class': 'lale.operators.PlannedPipeline',
 'state': 'planned',
 'edges': [],
 'steps': [{'class': 'lale.operators.OperatorChoice',
   'operator': 'GaussianNB | MultinomialNB | BernoulliNB | pipeline_4895971872',
   'state': 'planned',
   'steps': [{'class': 'lale.lib.autogen.gaussian_nb.GaussianNBImpl',
     'state': 'planned',
     'operator': 'GaussianNB'},
    {'class': 'lale.lib.autogen.multinomial_nb.MultinomialNBImpl',
     'state': 'planned',
     'operator': 'MultinomialNB'},
    {'class': 'lale.lib.autogen.bernoulli_nb.BernoulliNBImpl',
     'state': 'planned',
     'operator': 'BernoulliNB'},
    {'class': 'lale.operators.PlannedPipeline',
     'state': 'planned',
     'edges': [[0, 1]],
     'steps': [{'class': 'lale.operators.OperatorChoice',
       'operator': 'pipeline_4895971536 | pipeline_4895972488 | PCA | FeatureAgglomeration | PolynomialFeatures',
       'state': 'planned',
       'steps': [{'class': 'lale.operators.PlannedPipeline',
         'state': 'planned',
  

In [9]:
trainer = HyperoptCV(estimator=generated, cv=2, max_evals=3, scoring='r2')
trained = trainer.fit(train_X, train_y)

100%|██████████| 3/3 [00:01<00:00,  1.66it/s, best loss: -0.9380363327674024]


In [10]:
best_estimator(trained).to_json()

{'class': 'lale.operators.TrainedPipeline',
 'state': 'trained',
 'edges': [[0, 1], [1, 2]],
 'steps': [{'class': 'lale.lib.sklearn.feature_agglomeration.FeatureAgglomerationImpl',
   'state': 'trained',
   'operator': 'FeatureAgglomeration',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html',
   'hyperparams': {'affinity': 'precomputed',
    'compute_full_tree': False,
    'linkage': 'single',
    'n_clusters': 4},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.polynomial_features.PolynomialFeaturesImpl',
   'state': 'trained',
   'operator': 'PolynomialFeatures',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html',
   'hyperparams': {'include_bias': False},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'l

# alphad3m: Grammar from this [paper](https://www.automl.org/wp-content/uploads/2019/06/automlws2019_Paper34.pdf)

In [11]:
from lale.lib.sklearn import OneHotEncoder, SimpleImputer, Ridge, LinearSVC, PCA, GaussianNB
from lale.lib.autogen import OrdinalEncoder, SGDClassifier

g = Grammar()

g.start  = g.est | g.clean >> g.est | g.tfm >> g.est | g.clean >> g.tfm >> g.est
g.clean  = g.clean1 >> g.clean | g.clean1
g.tfm    = g.tfm1 >> g.tfm | g.tfm1

g.clean1 = SimpleImputer #SkImputer | MissingIndicator
g.tfm1   = OneHotEncoder  | PCA # | OrdinalEncoder
g.est    = GaussianNB | Ridge  | LinearSVC # | SGDClassifier

generated = explore(g, 4)
generated.to_json()

{'class': 'lale.operators.PlannedPipeline',
 'state': 'planned',
 'edges': [],
 'steps': [{'class': 'lale.operators.OperatorChoice',
   'operator': 'GaussianNB | Ridge | LinearSVC | pipeline_4915788600 | pipeline_4915789496 | pipeline_4915788880',
   'state': 'planned',
   'steps': [{'class': 'lale.lib.sklearn.gaussian_nb.GaussianNBImpl',
     'state': 'planned',
     'operator': 'GaussianNB',
     'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html'},
    {'class': 'lale.lib.sklearn.ridge.RidgeImpl',
     'state': 'planned',
     'operator': 'Ridge',
     'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html'},
    {'class': 'lale.lib.sklearn.linear_svc.LinearSVCImpl',
     'state': 'planned',
     'operator': 'LinearSVC',
     'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html'},
    {'class': 'lale.operators.PlannedPipeline',
     

In [12]:
trainer = HyperoptCV(estimator=generated, cv=2, max_evals=3, scoring='r2')
trained = trainer.fit(train_X, train_y)

100%|██████████| 3/3 [00:03<00:00,  1.08s/it, best loss: -0.9139575551782683]


In [13]:
best_estimator(trained).to_json()

{'class': 'lale.operators.TrainedPipeline',
 'state': 'trained',
 'edges': [[0, 1]],
 'steps': [{'class': 'lale.lib.sklearn.simple_imputer.SimpleImputerImpl',
   'state': 'trained',
   'operator': 'SimpleImputer',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html',
   'hyperparams': {'strategy': 'most_frequent'},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False},
  {'class': 'lale.lib.sklearn.gaussian_nb.GaussianNBImpl',
   'state': 'trained',
   'operator': 'GaussianNB',
   'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html',
   'hyperparams': {},
   'is_frozen_trainable': True,
   'coefs': 'coefs_not_available',
   'is_frozen_trained': False}]}