# PyCaret 2 Regression Example
This notebook is created using PyCaret 2.0. Last updated : 31-07-2020

In [1]:
# check version
from pycaret.utils import version
version()

'2.3.5'

# 1. Loading Dataset

In [2]:
from library.common import Core
core = Core()

data = core.get_cluster_regression_datas(cluster = 'Global', first = 1970, last = 2019)


# 2. Initialize Setup

In [3]:
from pycaret.regression import *
reg1 = setup(data, target = 'co2', session_id=123, log_experiment=True,
             normalize = True, normalize_method = 'minmax',
             remove_outliers = False, outliers_threshold = 0.05,
             experiment_name='carbon emission')

Unnamed: 0,Description,Value
0,session_id,123
1,Target,co2
2,Original Data,"(50, 7)"
3,Missing Values,False
4,Numeric Features,6
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(34, 6)"


# 3. Compare Baseline

In [4]:
best_model = compare_models(fold=5)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,355.2449,230160.0139,457.4672,0.9922,0.0204,0.0153,0.286
rf,Random Forest Regressor,434.7358,301723.0588,531.9396,0.9908,0.0242,0.0189,0.396
gbr,Gradient Boosting Regressor,484.5277,350966.5171,576.1328,0.9886,0.0259,0.0208,0.05
ada,AdaBoost Regressor,515.3011,436274.1811,629.7158,0.9859,0.0298,0.0231,0.122
dt,Decision Tree Regressor,637.901,638618.2397,759.2606,0.9809,0.0332,0.0272,0.05
llar,Lasso Least Angle Regression,633.9905,786661.6184,861.3601,0.9754,0.035,0.0259,0.04
lasso,Lasso Regression,631.1633,794298.4938,864.9237,0.9751,0.035,0.0257,2.204
br,Bayesian Ridge,624.8609,805553.8042,868.5228,0.9748,0.0356,0.0257,0.05
lr,Linear Regression,631.4469,805399.9062,869.9352,0.9746,0.035,0.0256,2.994
lar,Least Angle Regression,633.7587,808282.9971,869.7717,0.9745,0.035,0.0256,0.066


# 4. Create Model

In [5]:
model = create_model('et')


Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,355.2505,265432.5597,515.2015,0.9939,0.0155,0.0121
1,167.3371,51623.4326,227.2079,0.9947,0.0092,0.0067
2,162.8038,65930.9163,256.7702,0.9988,0.0076,0.0055
3,278.3577,138279.9776,371.8602,0.9969,0.0215,0.0149
4,500.0563,304928.8661,552.2036,0.9921,0.0256,0.0214
5,329.1803,177019.9783,420.7374,0.9865,0.0143,0.0117
6,446.0263,261128.4845,511.0073,0.9748,0.0231,0.02
7,684.5536,660632.0082,812.7927,0.9791,0.0363,0.0282
8,213.4114,75366.3371,274.5293,0.9959,0.0107,0.0085
9,816.6,740215.4169,860.3577,0.9873,0.0451,0.0404


In [6]:
import numpy as np
# model = [create_model('et') for i in np.arange(0.1,1,0.1)]

In [7]:
print(len(model))

100


# 5. Tune Hyperparameters

In [8]:
tuned_model = tune_model(model, n_iter=50, optimize = 'MSE')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,654.737,812986.337,901.6575,0.9812,0.0307,0.0242
1,363.0237,179014.9178,423.1015,0.9815,0.0165,0.0142
2,467.886,398800.476,631.5065,0.9927,0.0221,0.0184
3,295.6408,142552.1327,377.5608,0.9968,0.0216,0.0161
4,451.4834,290523.4792,539.0023,0.9925,0.0241,0.019
5,245.3409,70695.056,265.8854,0.9946,0.0089,0.0084
6,436.8479,286009.9396,534.799,0.9724,0.026,0.0204
7,828.5479,940739.8173,969.9174,0.9703,0.0429,0.0337
8,343.8608,155544.5104,394.3913,0.9915,0.0182,0.0147
9,844.2092,1075723.3055,1037.1708,0.9815,0.0602,0.0466


In [9]:
tuned_model

ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mae',
                    max_depth=8, max_features=1.0, max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.005,
                    min_impurity_split=None, min_samples_leaf=3,
                    min_samples_split=7, min_weight_fraction_leaf=0.0,
                    n_estimators=180, n_jobs=-1, oob_score=False,
                    random_state=123, verbose=0, warm_start=False)

# 6. Ensemble Model

In [10]:
dt = create_model('dt')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,484.8042,389058.4674,623.7455,0.991,0.0219,0.0185
1,702.2759,512522.3728,715.9067,0.9469,0.0297,0.0291
2,394.2607,244800.9549,494.7736,0.9955,0.0297,0.022
3,443.0239,309031.8145,555.9063,0.9931,0.0315,0.0233
4,1315.623,3267505.2056,1807.6242,0.9153,0.0659,0.048
5,1139.9388,1608927.6128,1268.4351,0.8771,0.0439,0.0397
6,946.7057,1619812.693,1272.7186,0.8434,0.0466,0.0368
7,622.582,518133.6728,719.815,0.9836,0.0318,0.0256
8,798.1061,735829.6699,857.8051,0.9597,0.0347,0.0315
9,675.2487,609414.9927,780.6504,0.9895,0.0347,0.0299


In [11]:
bagged_dt = ensemble_model(dt, n_estimators=50)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,344.9787,294527.7503,542.7041,0.9932,0.0159,0.0112
1,265.4644,75380.874,274.5558,0.9922,0.0116,0.0111
2,236.2398,96691.4846,310.9525,0.9982,0.0103,0.0089
3,341.0069,238617.2978,488.4847,0.9947,0.028,0.0182
4,836.8217,712488.7195,844.0905,0.9815,0.0341,0.0326
5,199.3061,58228.9871,241.3068,0.9956,0.0084,0.0072
6,655.8688,516338.6059,718.5671,0.9501,0.0332,0.0292
7,316.8475,133565.8628,365.4666,0.9958,0.0166,0.0136
8,405.8232,221064.8435,470.1753,0.9879,0.0213,0.017
9,756.2085,787750.8281,887.5533,0.9865,0.0511,0.0399


In [12]:
boosted_dt = ensemble_model(dt, method = 'Boosting')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,451.9375,340533.1487,583.5522,0.9921,0.0209,0.0174
1,546.6587,407266.7976,638.1746,0.9578,0.0261,0.0224
2,642.8936,523238.6258,723.3524,0.9905,0.0333,0.029
3,418.7207,300162.7046,547.8711,0.9933,0.0312,0.0222
4,887.2604,1007854.1266,1003.9194,0.9739,0.0457,0.0382
5,540.9733,301668.9536,549.244,0.977,0.0208,0.0201
6,1121.847,1439756.1838,1199.8984,0.8608,0.0581,0.0504
7,712.903,915760.4094,956.9537,0.9711,0.045,0.031
8,798.1061,735829.6699,857.8051,0.9597,0.0347,0.0315
9,675.2487,609414.9927,780.6504,0.9895,0.0347,0.0299


# 7. Blend Models

In [13]:
blender = blend_models()

TypeError: blend_models() missing 1 required positional argument: 'estimator_list'

# 8. Stack Models

In [None]:
stacker = stack_models(estimator_list = compare_models(n_select=5, fold = 5, whitelist = models(type='ensemble').index.tolist()))

# 9. Analyze Model

In [None]:
plot_model(dt)

In [None]:
plot_model(dt, plot = 'error')

In [None]:
plot_model(dt, plot = 'feature')

In [None]:
evaluate_model(dt)

# 10. Interpret Model

In [None]:
interpret_model(lightgbm)

In [None]:
interpret_model(lightgbm, plot = 'correlation')

In [None]:
interpret_model(lightgbm, plot = 'reason', observation = 12)

# 11. AutoML()

In [None]:
best = automl(optimize = 'MAE')
best

# 12. Predict Model

In [None]:
pred_holdouts = predict_model(lightgbm)
pred_holdouts.head()

In [None]:
new_data = data.copy()
new_data.drop(['charges'], axis=1, inplace=True)
predict_new = predict_model(best, data=new_data)
predict_new.head()

# 13. Save / Load Model

In [None]:
save_model(best, model_name='best-model')

In [None]:
loaded_bestmodel = load_model('best-model')
print(loaded_bestmodel)

In [None]:
from sklearn import set_config
set_config(display='diagram')
loaded_bestmodel[0]

In [None]:
from sklearn import set_config
set_config(display='text')

# 14. Deploy Model

In [None]:
deploy_model(best, model_name = 'best-aws', authentication = {'bucket' : 'pycaret-test'})

# 15. Get Config / Set Config

In [None]:
X_train = get_config('X_train')
X_train.head()

In [None]:
get_config('seed')

In [None]:
from pycaret.regression import set_config
set_config('seed', 999)

In [None]:
get_config('seed')

# 16. MLFlow UI

In [None]:
!mlflow ui

# End
Thank you. For more information / tutorials on PyCaret, please visit https://www.pycaret.org