In [1]:
using Lale

In [2]:
using Random
using Statistics
using Test
using DataFrames: DataFrame
using AutoMLPipeline: Utils

In [4]:
iris = getiris()
Xreg = iris[:,1:3] |> DataFrame
Yreg = iris[:,4]   |> Vector
Xcl  = iris[:,1:4] |> DataFrame
Ycl  = iris[:,5]   |> Vector;

In [8]:
# lale ops
pca     = LalePreprocessor("PCA")
rb      = LalePreprocessor("RobustScaler")
noop    = LalePreprocessor("NoOp")
rfr     = LaleLearner("RandomForestRegressor")
rfc     = LaleLearner("RandomForestClassifier")
treereg = LaleLearner("DecisionTreeRegressor");
# amlp ops
ohe  = OneHotEncoder()
catf = CatFeatureSelector()
numf = NumFeatureSelector()

NumFeatureSelector("numf_j7I", Dict{Symbol, Any}(:name => "numf_j7I", :numcols => Int64[]))

In [6]:
pca |> typeof |> supertypes

(LalePreprocessor, Lale.LaleAbsTypes.LaleOperator, Learner, Computer, Machine, Any)

In [7]:
rfr |> typeof |> supertypes

(LaleLearner, Lale.LaleAbsTypes.LaleOperator, Learner, Computer, Machine, Any)

In [9]:
# regression using lale pipeline
lalepipe =  (pca + noop) >>  (rfr | treereg )
lale_hopt = LaleOptimizer(lalepipe,"Hyperopt",max_evals=10,cv=3)
lalepred = fit_transform!(lale_hopt,Xreg,Yreg)
lalermse=score(:rmse,lalepred,Yreg)

  0%|                                   | 0/10 [00:00<?, ?trial/s, best loss=?] 10%|█         | 1/10 [00:00<00:02,  3.33trial/s, best loss: 68.79479113241148] 20%|██▏        | 2/10 [00:00<00:03,  2.34trial/s, best loss: 39.4035452199696] 30%|███▎       | 3/10 [00:01<00:03,  2.09trial/s, best loss: 39.4035452199696] 40%|████▍      | 4/10 [00:01<00:03,  1.95trial/s, best loss: 39.4035452199696] 50%|█████▌     | 5/10 [00:02<00:02,  2.31trial/s, best loss: 39.4035452199696] 60%|██████▌    | 6/10 [00:02<00:01,  2.48trial/s, best loss: 39.4035452199696] 70%|██████▎  | 7/10 [00:03<00:01,  2.10trial/s, best loss: 38.306480298223626] 80%|███████▏ | 8/10 [00:03<00:00,  2.13trial/s, best loss: 38.306480298223626] 90%|████████ | 9/10 [00:04<00:00,  2.03trial/s, best loss: 38.306480298223626]100%|████████| 10/10 [00:04<00:00,  2.03trial/s, best loss: 38.306480298223626]100%|████████| 10/10 [00:04<00:00,  2.14trial/s, best loss: 38.306480298223626]

0.210597181663228

In [12]:
lale_hopt |> typeof |> supertypes

(LaleOptimizer, Lale.LaleAbsTypes.LaleOperator, Learner, Computer, Machine, Any)

In [13]:
# regression using AMLP pipeline
amlpipe = @pipeline  (pca + noop) |> (rfr * treereg)
amlpred = fit_transform!(amlpipe,Xreg,Yreg)
crossvalidate(amlpipe,Xreg,Yreg,"mean_squared_error")
amlprmse=score(:rmse,amlpred,Yreg)

fold: 1, 0.03799999999999999
fold: 2, 0.05466666666666664
fold: 3, 0.04149999999999999
fold: 4, 0.05366666666666666
fold: 5, 0.041999999999999996
fold: 6, 0.043333333333333314
fold: 7, 0.05800000000000001
fold: 8, 0.07866666666666662
fold: 9, 0.05333333333333332
fold: 10, 0.09266666666666667
errors: 0


0.027688746209726913

In [14]:
amlpipe |> typeof |> supertypes

(Pipeline, Workflow, Machine, Any)

In [15]:
# classification lale pipeline
lalepipe =  (rb + pca) |> rfc
lale_hopt = LaleOptimizer(lalepipe,"Hyperopt",max_evals = 10,cv = 3)
lalepred  = fit_transform!(lale_hopt,Xcl,Ycl)
laleacc   = score(:accuracy,lalepred,Ycl)


  0%|                                   | 0/10 [00:00<?, ?trial/s, best loss=?] 10%|▊       | 1/10 [00:00<00:05,  1.60trial/s, best loss: -0.9533333333333333] 20%|████▍                 | 2/10 [00:01<00:05,  1.35trial/s, best loss: -0.96] 30%|██████▌               | 3/10 [00:02<00:05,  1.37trial/s, best loss: -0.96] 40%|███▏    | 4/10 [00:02<00:03,  1.58trial/s, best loss: -0.9733333333333333] 50%|████    | 5/10 [00:03<00:02,  1.73trial/s, best loss: -0.9733333333333333] 60%|████▊   | 6/10 [00:04<00:02,  1.47trial/s, best loss: -0.9733333333333333] 70%|█████▌  | 7/10 [00:04<00:01,  1.51trial/s, best loss: -0.9733333333333333] 80%|██████▍ | 8/10 [00:05<00:01,  1.49trial/s, best loss: -0.9733333333333333] 90%|███████▏| 9/10 [00:06<00:00,  1.48trial/s, best loss: -0.9733333333333333]100%|███████| 10/10 [00:06<00:00,  1.51trial/s, best loss: -0.9733333333333333]100%|███████| 10/10 [00:06<00:00,  1.50trial/s, best loss: -0.9733333333333333]

98.66666666666667

In [16]:
# classification using AMLP pipeline
amlpipe = @pipeline  (pca + rb) |> rfc
amlpred = fit_transform!(amlpipe,Xcl,Ycl)
crossvalidate(amlpipe,Xcl,Ycl,"accuracy_score")
amlpacc = score(:accuracy,amlpred,Ycl)

fold: 1, 1.0
fold: 2, 1.0
fold: 3, 0.8666666666666667
fold: 4, 0.9333333333333333
fold: 5, 1.0
fold: 6, 1.0
fold: 7, 0.9333333333333333
fold: 8, 0.9333333333333333
fold: 9, 0.8
fold: 10, 0.9333333333333333
errors: 0


100.0

In [17]:
plr = @pipeline (catf |> ohe) + (numf |> rb |> pca) |> rfr;
crossvalidate(plr,Xreg,Yreg,"mean_absolute_error",10,false) 

(mean = 0.1542311111111111, std = 0.02257389256451805, folds = 10, errors = 0)

In [18]:
plc = @pipeline (catf |> ohe) + (numf |> rb |> pca) |> rfc;
crossvalidate(plc,Xcl,Ycl,"accuracy_score",10,false) 

(mean = 0.9466666666666669, std = 0.052587375849774354, folds = 10, errors = 0)