In [2]:
using ScikitLearn
using ScikitLearn.Pipelines: Pipeline, FeatureUnion
using XGBoost

In [11]:
using Revise

include("../../Herb.jl/src/Herb.jl")

Main.Herb

In [None]:
@sk_import decomposition: (PCA)
@sk_import preprocessing: (StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler, Binarizer, PolynomialFeatures)
@sk_import feature_selection: (VarianceThreshold, SelectKBest, SelectPercentile, SelectFwe, RFE)
@sk_import tree: (DecisionTreeClassifier)
@sk_import ensemble: (RandomForestClassifier, GradientBoostingClassifier)
@sk_import linear_model: (LogisticRegression)
@sk_import neighbors: (NearestNeighbors)
@sk_import svm: (LinearSVC)

In [33]:
# Need to import:
# FeatureUnion

g = Herb.HerbGrammar.@cfgrammar begin
    Start = Pipeline([PRE, EST]) 
    PRE   = PASS | TFM | EST | Pipeline([PRE, PRE]) | FeatureUnion([PRE, PRE])
    EST   = ("linear_pca", PCA()) | ("kernel_pca", KernelPCA())
    TFM   = () 
    PASS  = ("id", FunctionTransformer(x -> x))     # this transformer leaves the input unchanged
end

1: Start = Pipeline([PRE, EST])
2: PRE = PASS
3: PRE = TFM
4: PRE = EST
5: PRE = Pipeline([PRE, PRE])
6: PRE = FeatureUnion([PRE, PRE])
7: EST = ("linear_pca", PCA())
8: EST = ("kernel_pca", KernelPCA())
9: TFM = ()
10: PASS = ("id", FunctionTransformer((x->begin
                [90m#= c:\Users\denys\Programming\BEP\Grammar\HerbExamples.jl\notebooks\grammar.ipynb:9 =#[39m
                x
            end)))


In [43]:
sequence(a, b) = Pipeline([a, b]) 
parallel(a, b) = FeatureUnion([a, b]) 

g = Herb.HerbGrammar.@cfgrammar begin
    # START   = CLASSIF | sequence(PRE, CLASSIF)
    # PRE     = PREPROC | FSELECT | sequence(PRE, PRE) | parallel(BRANCH, BRANCH)
    # BRANCH  = PRE | CLASSIF | sequence(PRE, CLASSIF) 

    
    START   = Pipeline([CLASSIF]) | Pipeline([PRE, CLASSIF])
    PRE     = PREPROC | FSELECT | ("seq", Pipeline([PRE, PRE]))  | ("par", FeatureUnion([PRE, PRE])) 
    # BRANCH  = PRE | CLASSIF | Pipeline([PRE, CLASSIF]) 

    PREPROC =   
        ("StandardScaler", StandardScaler()) |
        ("RobustScaler", RobustScaler()) |
        ("MinMaxScaler", MinMaxScaler()) |
        ("MaxAbsScaler", MaxAbsScaler()) |
        ("PCA", PCA()) |
        ("Binarizer", Binarizer()) |
        ("PolynomialFeatures", PolynomialFeatures())
    FSELECT =  
        ("VarianceThreshold", VarianceThreshold()) |
        ("SelectKBest",  SelectKBest()) |
        ("SelectPercentile",  SelectPercentile()) |
        ("SelectFwe",  SelectFwe()) |
        ("Recursive Feature Elimination",  RFE(LinearSVC())) 
    CLASSIF =
        ("DecisionTree", DecisionTreeClassifier()) |
        ("RandomForest", RandomForestClassifier()) |
        ("Gradient Boosting Classifier", GradientBoostingClassifier()) |
        ("LogisticRegression", LogisticRegression()) |
        ("NearestNeighborClassifier", NearestNeighbors())
end

1: START = EST
2: START = sequence(PRE, CLASSIF)
3: PRE = PREPROC
4: PRE = FSELECT
5: PRE = sequence(PRE, PRE)
6: PRE = parallel(PRE, PRE)
7: PREPROC = ("StandardScaler", StandardScaler)
8: PREPROC = ("RobustScaler", RobustScaler)
9: PREPROC = ("MinMaxScaler", MinMaxScaler)
10: PREPROC = ("MaxAbsScaler", MaxAbsScaler)
11: PREPROC = ("RandomizedPCA", RandomizedPCA)
12: PREPROC = ("Binarizer", Binarizer)
13: PREPROC = ("PolynomialFeatures", PolynomialFeatures)
14: CLASSIF = ("DecisionTree", DecisionTree)
15: CLASSIF = ("RandomForest", RandomForest)
16: CLASSIF = ("eXtreme Gradient Boosting Classifier", eXtremeGradientBoostingClassifier)
17: CLASSIF = ("LogisticRegression", LogisticRegression)
18: CLASSIF = ("KNearestNeighborClassifier", KNearestNeighborClassifier)
19: FSELECT = ("VarianceThreshold", VarianceThreshold)
20: FSELECT = ("SelectKBest", SelectKBest)
21: FSELECT = ("SelectPercentile", SelectPercentile)
22: FSELECT = ("SelectFwe", SelectFwe)
23: FSELECT = ("Recursive Feature Eli

In [None]:
cfe = Herb.HerbSearch.ContextFreeEnumerator(g, 3, :START)
for rule in cfe
    println(Herb.HerbSearch.rulenode2expr(rule, g))
end

In [None]:
# easy pipeline:
Pipeline([("Recursive Feature Elimination", RFE(LinearSVC())), ("DecisionTree", DecisionTreeClassifier())])

# more complex pipeline
Pipeline([("seq", Pipeline([("par", FeatureUnion([("PolynomialFeatures", PolynomialFeatures()), ("SelectFwe", SelectFwe())])), ("seq", Pipeline([("SelectPercentile", SelectPercentile()), ("PolynomialFeatures", PolynomialFeatures())]))])), ("NearestNeighborClassifier", NearestNeighbors())])