### ✅ Importing libraries

In [1]:
pip install pycaret

Collecting pycaret
  Downloading pycaret-3.3.1-py3-none-any.whl (486 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Collecting deprecation>=2.1.0 (from pycaret)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting imbalanced-learn>=0.12.0 (from pycaret)
  Downloading imbalanced_learn-0.12.2-py3-none-any.whl (257 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.0/258.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.2/302.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0

In [2]:
import pandas as pd
import numpy as np
import pycaret.regression

import warnings
warnings.filterwarnings("ignore")

### ✅ Getting raw data from github repository

In [3]:
Raw_Data_Url = "https://raw.githubusercontent.com/Alireza-Esp/CPP_Model/main/Data/CarsData.csv"

In [4]:
Raw_Data = pd.read_csv(Raw_Data_Url)

In [5]:
Raw_Data.head(20)

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize,Manufacturer
0,I10,2017,7495,Manual,11630,Petrol,145,60.1,1.0,hyundi
1,Polo,2017,10989,Manual,9200,Petrol,145,58.9,1.0,volkswagen
2,2 Series,2019,27990,Semi-Auto,1614,Diesel,145,49.6,2.0,BMW
3,Yeti Outdoor,2017,12495,Manual,30960,Diesel,150,62.8,2.0,skoda
4,Fiesta,2017,7999,Manual,19353,Petrol,125,54.3,1.2,ford
5,C-HR,2019,26791,Automatic,2373,Hybrid,135,74.3,1.8,toyota
6,Kuga,2019,17990,Manual,7038,Petrol,145,34.4,1.5,ford
7,Tiguan,2019,27490,Semi-Auto,3000,Petrol,145,30.4,2.0,volkswagen
8,Fiesta,2018,9891,Manual,31639,Petrol,145,65.7,1.0,ford
9,A Class,2017,17498,Manual,9663,Diesel,30,62.8,2.1,merc


### ✅ Auto ML

#### 🔹 Setting up the training environment using Pycaret

In [6]:
RE = pycaret.regression.setup(data=Raw_Data, target="price", session_id=0, max_encoding_ohe=200)

Unnamed: 0,Description,Value
0,Session id,0
1,Target,price
2,Target type,Regression
3,Original data shape,"(97712, 10)"
4,Transformed data shape,"(97712, 215)"
5,Transformed train set shape,"(68398, 215)"
6,Transformed test set shape,"(29314, 215)"
7,Numeric features,5
8,Categorical features,4
9,Preprocess,True


In [7]:
RE.X_transformed

Unnamed: 0,model_ Mokka X,model_ Q3,model_ I30,model_ GTC,model_ X4,model_ Fabia,model_ Yaris,model_ Tucson,model_ Kuga,model_ Scala,...,engineSize,Manufacturer_vauxhall,Manufacturer_Audi,Manufacturer_hyundi,Manufacturer_BMW,Manufacturer_skoda,Manufacturer_toyota,Manufacturer_ford,Manufacturer_volkswagen,Manufacturer_merc
75562,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.6,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86653,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4832,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.6,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
50377,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43812,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
47171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
46435,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
81677,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [8]:
best = pycaret.regression.compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,1187.7025,3983856.8541,1989.3022,0.9587,0.106,0.0752,100.426
rf,Random Forest Regressor,1173.2794,4053580.0364,2008.544,0.958,0.1048,0.074,71.315
xgboost,Extreme Gradient Boosting,1322.0566,4457535.7272,2107.5929,0.9537,0.1152,0.0841,4.12
lightgbm,Light Gradient Boosting Machine,1437.573,5179609.7247,2273.8646,0.9462,0.1236,0.092,6.602
dt,Decision Tree Regressor,1471.2072,6677040.6227,2576.2487,0.9308,0.135,0.0925,2.228
gbr,Gradient Boosting Regressor,2100.1624,9670110.7235,3107.8665,0.8995,0.1785,0.1406,21.595
lr,Linear Regression,2266.5825,13576279.319,3681.3406,0.8589,0.31,0.1788,3.256
br,Bayesian Ridge,2266.7069,13575308.1525,3681.1848,0.8589,0.3105,0.1789,3.328
ridge,Ridge Regression,2268.0444,13583934.268,3682.2433,0.8588,0.3084,0.179,1.426
llar,Lasso Least Angle Regression,2289.6223,13772339.213,3707.524,0.8569,0.311,0.181,1.572


Processing:   0%|          | 0/81 [00:00<?, ?it/s]