# Basic steps for developing a model with Pycaret

In [1]:
from pycaret.datasets import get_data
data1 = get_data(dataset = 'traffic')

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,1,5545
1,,289.36,0.0,0.0,75,Clouds,0,4516
2,,289.58,0.0,0.0,90,Clouds,0,4767
3,,290.13,0.0,0.0,90,Clouds,0,5026
4,,291.14,0.0,0.0,75,Clouds,0,4918


In [2]:
from pycaret.regression import *

## Preprocessing

In [5]:
dataset = setup(data = data1, target = 'traffic_volume',
                session_id = 438, 
                verbose = True)

Unnamed: 0,Description,Value
0,Session id,438
1,Target,traffic_volume
2,Target type,Regression
3,Original data shape,"(48204, 8)"
4,Transformed data shape,"(48204, 28)"
5,Transformed train set shape,"(33742, 28)"
6,Transformed test set shape,"(14462, 28)"
7,Numeric features,5
8,Categorical features,2
9,Rows with missing values,99.9%


## Compare models
(Call regression)

In [6]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lightgbm,Light Gradient Boosting Machine,1502.9547,3079513.6446,1754.7529,0.2183,0.9296,2.4701,0.232
gbr,Gradient Boosting Regressor,1531.0211,3150013.956,1774.7161,0.2004,0.9422,2.5583,0.814
ada,AdaBoost Regressor,1577.2863,3282090.0319,1811.574,0.1669,0.973,2.7074,0.64
knn,K Neighbors Regressor,1573.6762,3662855.0418,1913.7557,0.0702,0.9601,2.5601,0.316
rf,Random Forest Regressor,1547.2748,3741181.5363,1933.9648,0.0502,0.9554,2.4877,2.884
omp,Orthogonal Matching Pursuit,1712.8788,3848922.2942,1961.8288,0.0229,1.0224,2.964,0.119
dummy,Dummy Regressor,1743.7632,3939897.8403,1984.8734,-0.0001,1.0318,2.9022,0.101
et,Extra Trees Regressor,1678.7005,4588616.2268,2141.9299,-0.165,1.048,2.6702,1.821
dt,Decision Tree Regressor,1746.3217,5206293.5785,2281.4939,-0.3218,1.1239,2.5748,0.156
huber,Huber Regressor,1610.739,8252285.3313,2376.2313,-1.1126,0.9606,2.5991,1.027


Compare to select best model

In [11]:
best

### Params tuning

Params tuning for best model

In [7]:
tuned_best = tune_model(estimator = best)

Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [8]:
final_model = finalize_model(estimator = tuned_best) 

In [13]:
final_model

## Save best params model

In [10]:
save_model(final_model,'rf_base_traffic')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['temp', 'rain_1h', 'snow_1h',
                                              'clouds_all', 'Rush Hour'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['holiday', 'weather_main'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('onehot_encoding',
                  TransformerWrapper(include=['holiday', 'weather_main'],
                                     transformer=OneHotEncoder(cols=['holiday',
                                                                     'weather_main'],
                                                               handle_missing='return_nan',
                                                               use_cat_names=True))),
                 ('clean_column_names',
 

#### Predictions

In [14]:
saved_model = load_model('rf_base_traffic')

Transformation Pipeline and Model Successfully Loaded


In [26]:
new_data = {
    'holiday':None,	
    'temp':299,
    'rain_1h':0.1,	
    'snow_1h':0,
    'clouds_all':88,
    'weather_main': 'Thunderstorm',
    'Rush Hour': 0
}
import pandas as pd
new_data = pd.DataFrame([new_data])
new_data

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour
0,,299,0.1,0,88,Thunderstorm,0


In [27]:
predictions = predict_model(saved_model, data=new_data)
predictions.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,prediction_label
0,,299,0.1,0,88,Thunderstorm,0,3442.272854
