# Ibese Transport Analytics 

## Pycaret Implementation

* Author: Olumide Oyalola

In [9]:
from pycaret.datasets import get_data
from pycaret.regression import *
import pandas as pd

import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("ibese-trip-arrival-prediction")

In [2]:
trip = pd.read_csv('../Data/trip.csv')

In [3]:
trip.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200489 entries, 0 to 200488
Data columns (total 9 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   timeOfDay            200489 non-null  object
 1   Planned KM           200489 non-null  int64 
 2   Duration             200489 non-null  int64 
 3   Plant                200489 non-null  object
 4   Waybill Date         200489 non-null  object
 5   Actual Arrival Date  200489 non-null  object
 6   Month                200489 non-null  object
 7   Driver ID            200489 non-null  int64 
 8   Day                  200489 non-null  object
dtypes: int64(3), object(6)
memory usage: 13.8+ MB


In [6]:
trip_ibese = trip[trip['Plant'] == "Ibese"]

In [15]:
setup(trip_ibese, ignore_features=['Driver ID', 'Waybill Date','Actual Arrival Date','Plant'], normalize = True, data_split_stratify=True,
     target= 'Duration', log_experiment=True, session_id=123,
     experiment_name= "ibese-trip-arrival-prediction", log_plots=True, log_profile=True, log_data=True, profile=True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Duration
2,Original Data,"(96244, 9)"
3,Missing Values,False
4,Numeric Features,1
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(67370, 24)"


(False,
 10126     3
 33987     4
 136521    2
 143685    3
 23150     6
          ..
 84562     4
 16997     5
 50882     3
 102869    3
 92943     3
 Name: Duration, Length: 67370, dtype: int64,
 10,
 <MLUsecase.REGRESSION: 2>,
 None,
 [<pandas.io.formats.style.Styler at 0x258a4da29e8>],
 <pycaret.loggers.DashboardLogger at 0x258a4e05438>,
 None,
 {'parameter': 'Hyperparameters',
  'residuals': 'Residuals',
  'error': 'Prediction Error',
  'cooks': 'Cooks Distance',
  'rfe': 'Feature Selection',
  'learning': 'Learning Curve',
  'manifold': 'Manifold Learning',
  'vc': 'Validation Curve',
  'feature': 'Feature Importance',
  'feature_all': 'Feature Importance (All)',
  'tree': 'Decision Tree',
  'residuals_interactive': 'Interactive Residuals'},
 {'lr': <pycaret.containers.models.regression.LinearRegressionContainer at 0x258a93e9358>,
  'lasso': <pycaret.containers.models.regression.LassoRegressionContainer at 0x258a93e9860>,
  'ridge': <pycaret.containers.models.regression.RidgeRegr

In [16]:
best_model=compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,0.8007,1.2664,1.1253,0.8132,0.2023,0.2281,8.072
xgboost,Extreme Gradient Boosting,0.8013,1.2713,1.1274,0.8125,0.2026,0.2278,5.029
lightgbm,Light Gradient Boosting Machine,0.8033,1.2771,1.13,0.8116,0.2035,0.2296,0.444
rf,Random Forest Regressor,0.8386,1.3855,1.177,0.7957,0.2137,0.2374,10.893
gbr,Gradient Boosting Regressor,0.8357,1.4078,1.1864,0.7924,0.2104,0.2376,3.506
et,Extra Trees Regressor,0.8497,1.4403,1.2,0.7876,0.22,0.2405,14.957
dt,Decision Tree Regressor,0.8586,1.4793,1.2162,0.7818,0.2242,0.2431,0.199
ada,AdaBoost Regressor,0.8938,1.5636,1.2503,0.7695,0.2275,0.2672,1.0
knn,K Neighbors Regressor,0.9413,1.9937,1.4119,0.706,0.2407,0.266,2.576
lar,Least Angle Regression,1.4181,5.8336,2.4147,0.1408,0.3455,0.3664,0.043


In [19]:
evaluate_model(best_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [18]:
predict_model(best_model)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,CatBoost Regressor,0.8044,1.3021,1.1411,0.8083,0.2027,0.2279


Unnamed: 0,Planned KM,timeOfDay_Afternoon,timeOfDay_Evening,timeOfDay_Morning,timeOfDay_Night,Month_Apr,Month_Aug,Month_Dec,Month_Feb,Month_Jan,...,Month_Sep,Day_Fri,Day_Mon,Day_Sat,Day_Sun,Day_Thu,Day_Tue,Day_Wed,Duration,Label
0,-0.891825,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2,2.976034
1,-0.971470,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2,2.954228
2,-0.139626,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4,2.849379
3,-0.880026,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2,2.755029
4,-1.030466,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2,2.037381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28869,-0.723687,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4,3.561054
28870,-0.880026,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2,2.501032
28871,2.299858,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5,4.915189
28872,2.364754,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,6,4.790240


In [None]:
save_model(best_model, "models/ibese_trip_arrival_prediction_model")