optimize pipeline for any machine learning mdoel using hierarchical optimization method for tabular datasets.
This package can be installed using pip from pypi using following command
pip install autotab
or using github link for the latest code
python -m pip install git+https://github.com/Sara-Iftikhar/autotab.git
or using setup file, go to folder where this repoitory is downloaded
python setup.py install
from ai4water.datasets import busan_beach
from skopt.plots import plot_objective
from autotab import OptimizePipeline
data = busan_beach()
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]
transformations = ['minmax', 'zscore', 'log', 'log10', 'sqrt', 'robust', 'quantile', 'none', 'scale']
pl = OptimizePipeline(
inputs_to_transform=data.columns.tolist()[0:-1],
parent_iterations=400,
child_iterations=20,
parent_algorithm='bayes',
child_algorithm="random",
cv_parent_hpo=True,
eval_metric='mse',
monitor=['r2', 'nse'],
input_transformations = transformations,
output_transformations = transformations,
models=[ "LinearRegression",
"LassoLars",
"Lasso",
"RandomForestRegressor",
"HistGradientBoostingRegressor",
"CatBoostRegressor",
"XGBRegressor",
"LGBMRegressor",
"GradientBoostingRegressor",
"ExtraTreeRegressor",
"ExtraTreesRegressor"
],
input_features=data.columns.tolist()[0:-1],
output_features=data.columns.tolist()[-1:],
cross_validator={"KFold": {"n_splits": 5}},
split_random=True,
)
get version information
pl._version_info()
perform optimization
results = pl.fit(data=data, process_results=False)
print optimization report
print(pl.report())
show convergence plot
pl.optimizer_._plot_convergence(save=False)
pl.optimizer_._plot_parallel_coords(figsize=(16, 8), save=False)
_ = pl.optimizer_._plot_distributions(save=False)
pl.optimizer_.plot_importance(save=False)
pl.optimizer_.plot_importance(save=False, plot_type="bar")
_ = plot_objective(results)
pl.optimizer._plot_evaluations(save=False)
pl.optimizer._plot_edf(save=False)
pl.dumbbell_plot(data=data)
pl.dumbbell_plot(data=data, metric_name='r2')
pl.taylor_plot(data=data, save=False, figsize=(6,6))
pl.compare_models()
pl.compare_models(plot_type="bar_chart")
pl.compare_models("r2", plot_type="bar_chart")
get best pipeline with respect to evaluation metric
pl.get_best_pipeline_by_metric('r2')
build fit and evaluate the best pipeline
model = pl.bfe_best_model_from_scratch(data=data)
pl.evaluate_model(model, data=data)
pl.evaluate_model(model, data=data, metric_name='nse')
pl.evaluate_model(model, data=data, metric_name='r2')
get best pipeline with respect to
pl.get_best_pipeline_by_metric('r2')
model = pl.bfe_best_model_from_scratch(data=data, metric_name='r2')
pl.evaluate_model(model, data=data, metric_name='r2')
print(f"all results are save in {pl.path} folder")