In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pycaret.regression import *

In [2]:
df = pd.read_csv("C:\\Users\\ripa_\\Desktop\\Programing\\IndyCar_Project\\datasets\\IndyCar_dataset_v12.csv")

In [3]:
df["EventDate"] = pd.to_datetime(df["EventDate"])
df = df.sort_values("EventDate")

In [4]:
print(df[["DriverID", "NormalizedPositionFinish", "DRFAvg"]].groupby("DriverID").head(3).head(30))

    DriverID  NormalizedPositionFinish  DRFAvg
0       3608                      0.52     NaN
25      4401                      0.88     NaN
24      4021                      0.80     NaN
23      3675                      0.56     NaN
22      4276                      0.20     NaN
21      4236                      1.00     NaN
20      4215                      0.40     NaN
19      4144                      0.32     NaN
17      3682                      0.36     NaN
16      3668                      0.44     NaN
15      3636                      0.48     NaN
14      3628                      0.04     NaN
13      3625                      0.76     NaN
18      3811                      0.84     NaN
11      4216                      0.12     NaN
12      4407                      0.64     NaN
2       3620                      0.68     NaN
3       3622                      0.00     NaN
4       3645                      0.08     NaN
5       3648                      0.96     NaN
1       3616 

In [5]:
df.head()

Unnamed: 0,DriverName,DriverID,Rookie,DRFAvg,DTAvg,DTTAvg,DNFRate,TDNFRate,DriverElo,DriverTElo,...,EventDate,EventDateFormatted,EventID,Era,EraID,Status,StatusID,FieldSize,PositionFinish,NormalizedPositionFinish
0,Marco Andretti,3608,1,,,,,,1500.0,1500.0,...,2012-03-25,"Sunday, March 25, 2012",2380,DW12 Era 2012-2017,0,Running,0,26,14,0.52
25,Katherine Legge,4401,1,,,,,,1500.0,1500.0,...,2012-03-25,"Sunday, March 25, 2012",2380,DW12 Era 2012-2017,0,DNF,1,26,23,0.88
24,Sebastien Bourdais,4021,0,,,,,,1500.0,1500.0,...,2012-03-25,"Sunday, March 25, 2012",2380,DW12 Era 2012-2017,0,DNF,1,26,21,0.8
23,Alex Tagliani,3675,0,,,,,,1500.0,1500.0,...,2012-03-25,"Sunday, March 25, 2012",2380,DW12 Era 2012-2017,0,Running,0,26,15,0.56
22,Simon Pagenaud,4276,0,,,,,,1500.0,1500.0,...,2012-03-25,"Sunday, March 25, 2012",2380,DW12 Era 2012-2017,0,Running,0,26,6,0.2


In [6]:
drop_cols = [
    "DriverName", "TeamName", "CarEngine","EventName", "Track", "EventTrackType",
    "EventDate", "EventDateFormatted", "EventID", "Era",
    "Status", "StatusID", "PositionFinish"
]

cutoff = df["EventDate"].quantile(0.95)
data = df[df["EventDate"] < cutoff].drop(columns=drop_cols)
data_unseen = df[df["EventDate"] >= cutoff].drop(columns=drop_cols)

print(data.corr(numeric_only=True)["NormalizedPositionFinish"].sort_values())

DriverElo                  -3.981294e-01
DriverTTElo                -3.678589e-01
TeamElo                    -3.296387e-01
TeamTElo                   -2.501114e-01
DriverTElo                 -2.471319e-01
TeamID                     -1.376309e-01
EngineTTElo                -7.560453e-02
EngineElo                  -6.551813e-02
EngineTElo                 -3.940775e-02
Rookie                     -2.423952e-02
TrackID                    -2.617858e-03
FieldSize                  -2.353585e-13
EventTrackTypeID            7.175254e-13
EraID                       1.686207e-12
EngineID                    2.618900e-02
TeamDNFRate                 3.193332e-02
TDNFRate                    6.753540e-02
DriverID                    1.024562e-01
DNFRate                     1.683375e-01
DTAvg                       2.638985e-01
TTP                         3.109530e-01
TRP                         3.136064e-01
TeamRitmo                   3.370879e-01
DRFAvg                      3.609328e-01
DTTAvg          

In [None]:
df = df.drop(columns=drop_cols)

In [9]:
print(df.columns.tolist())

['DriverID', 'Rookie', 'DRFAvg', 'DTAvg', 'DTTAvg', 'DNFRate', 'TDNFRate', 'DriverElo', 'DriverTElo', 'DriverTTElo', 'DriverRitmo', 'PositionStart', 'TeamID', 'TRP', 'TTP', 'TeamDNFRate', 'TeamElo', 'TeamTElo', 'TeamRitmo', 'EngineID', 'EngineElo', 'EngineTElo', 'EngineTTElo', 'TrackID', 'EventTrackTypeID', 'EraID', 'FieldSize', 'NormalizedPositionFinish']


In [10]:
exp = setup(
    data=data, 
    target="NormalizedPositionFinish", 
    session_id=123, 
    fold_strategy="timeseries",
    data_split_shuffle=False,
    fold_shuffle=False
)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,NormalizedPositionFinish
2,Target type,Regression
3,Original data shape,"(5236, 28)"
4,Transformed data shape,"(5236, 28)"
5,Transformed train set shape,"(3665, 28)"
6,Transformed test set shape,"(1571, 28)"
7,Numeric features,27
8,Rows with missing values,29.1%
9,Preprocess,True


In [10]:
compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
br,Bayesian Ridge,0.2264,0.0754,0.2743,0.1692,0.1866,0.9208,0.011
en,Elastic Net,0.2331,0.0777,0.2787,0.1433,0.1901,0.9761,0.009
ridge,Ridge Regression,0.2315,0.0779,0.2786,0.1415,0.1905,0.9705,0.009
et,Extra Trees Regressor,0.2345,0.0779,0.2789,0.1414,0.1917,1.0264,0.217
lasso,Lasso Regression,0.2349,0.078,0.2792,0.1402,0.1908,0.9977,0.384
llar,Lasso Least Angle Regression,0.2349,0.078,0.2792,0.1402,0.1908,0.9977,0.01
omp,Orthogonal Matching Pursuit,0.234,0.0785,0.2801,0.1346,0.1909,0.9704,0.01
ada,AdaBoost Regressor,0.2395,0.0786,0.2803,0.133,0.1935,1.0803,0.04
rf,Random Forest Regressor,0.2371,0.0791,0.2811,0.1282,0.1933,1.0502,0.438
gbr,Gradient Boosting Regressor,0.2379,0.0819,0.2857,0.0972,0.1954,1.0068,0.186


In [11]:
et = create_model('et')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2386,0.0792,0.2815,0.1256,0.1933,1.0484
1,0.25,0.0885,0.2974,0.0215,0.2051,1.1111
2,0.2442,0.0836,0.2891,0.0776,0.1999,1.0788
3,0.24,0.08,0.2828,0.1214,0.1942,1.0453
4,0.2369,0.0793,0.2817,0.1363,0.1939,1.0725
5,0.2238,0.0733,0.2707,0.1957,0.1873,1.0018
6,0.2294,0.0767,0.277,0.1525,0.1884,0.9719
7,0.229,0.0744,0.2727,0.1749,0.1861,0.9552
8,0.231,0.0735,0.2711,0.1907,0.1855,1.0015
9,0.2217,0.0704,0.2654,0.2176,0.1828,0.9781


In [12]:
et_tune = tune_model(et)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2391,0.0791,0.2812,0.1271,0.192,1.0349
1,0.2386,0.0797,0.2823,0.1185,0.1925,0.9942
2,0.2413,0.0811,0.2848,0.1044,0.1961,1.067
3,0.2373,0.0772,0.2779,0.1512,0.189,0.9751
4,0.2362,0.0785,0.2801,0.1457,0.1908,0.9823
5,0.2205,0.0702,0.2649,0.2299,0.1808,0.9267
6,0.2275,0.0754,0.2745,0.1678,0.1856,0.9139
7,0.2274,0.0736,0.2713,0.1832,0.1839,0.9027
8,0.2308,0.0744,0.2728,0.1807,0.1852,0.956
9,0.2244,0.0715,0.2674,0.2061,0.1822,0.945


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [13]:
predict_model(et_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Extra Trees Regressor,0.2135,0.0657,0.2564,0.2663,0.1732,0.8997


In [14]:
newpred1 = predict_model(et, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Extra Trees Regressor,0.2229,0.0748,0.2735,0.1667,0.1865,1.0207


In [15]:
rf = create_model('rf')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2412,0.0805,0.2837,0.1118,0.1949,1.0828
1,0.255,0.0919,0.3031,-0.0159,0.2096,1.1738
2,0.2421,0.0823,0.2868,0.0917,0.1985,1.1058
3,0.239,0.0794,0.2818,0.1276,0.1941,1.0441
4,0.2438,0.0819,0.2862,0.1082,0.1971,1.1225
5,0.2289,0.0743,0.2726,0.1845,0.1883,0.9938
6,0.234,0.0785,0.2802,0.1331,0.191,1.0067
7,0.2277,0.0739,0.2718,0.1803,0.1852,0.9498
8,0.2293,0.0733,0.2708,0.1924,0.1853,0.9791
9,0.2297,0.0749,0.2737,0.1681,0.1892,1.0435


In [16]:
rf_tune = tune_model(rf)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2402,0.0792,0.2815,0.1255,0.1936,1.0777
1,0.2469,0.0854,0.2922,0.0556,0.2013,1.0995
2,0.2381,0.0794,0.2817,0.124,0.1947,1.0724
3,0.2369,0.0772,0.2778,0.1519,0.1901,1.0091
4,0.239,0.0786,0.2804,0.1437,0.192,1.0346
5,0.221,0.0696,0.2638,0.2363,0.1816,0.9581
6,0.2286,0.0757,0.2752,0.1638,0.1867,0.9404
7,0.2273,0.073,0.2702,0.19,0.1838,0.9207
8,0.2287,0.0729,0.27,0.1973,0.1841,0.9715
9,0.2219,0.0695,0.2636,0.2285,0.1807,0.9619


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [17]:
predict_model(rf_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,0.2113,0.0639,0.2527,0.2871,0.1721,0.9221


In [18]:
newpred2 = predict_model(rf_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,0.2231,0.0743,0.2726,0.1721,0.1851,0.9826


In [19]:
gbr = create_model('gbr')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2563,0.0919,0.3031,-0.0142,0.2067,1.0554
1,0.2699,0.1035,0.3218,-0.1452,0.2215,1.1944
2,0.2461,0.0884,0.2972,0.0247,0.2055,1.0757
3,0.238,0.0799,0.2827,0.1216,0.1935,1.0094
4,0.2403,0.0819,0.2862,0.1082,0.197,1.1081
5,0.2251,0.0743,0.2726,0.1847,0.1869,0.9574
6,0.2263,0.0768,0.2771,0.1518,0.187,0.9108
7,0.2308,0.0782,0.2797,0.1322,0.1906,0.914
8,0.225,0.0724,0.2691,0.2025,0.1825,0.9207
9,0.2209,0.0715,0.2675,0.2055,0.1829,0.9219


In [20]:
gbr_tune = tune_model(gbr)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2421,0.0808,0.2842,0.1083,0.1955,1.0916
1,0.2508,0.0875,0.2959,0.0318,0.2043,1.1337
2,0.241,0.0799,0.2826,0.1183,0.1959,1.1144
3,0.2399,0.0785,0.2802,0.1374,0.1924,1.0339
4,0.2437,0.0802,0.2832,0.1271,0.1949,1.0859
5,0.2291,0.0739,0.2718,0.1893,0.1876,1.0079
6,0.2317,0.0766,0.2767,0.1544,0.1888,1.0046
7,0.2339,0.0749,0.2737,0.169,0.1873,0.979
8,0.2339,0.0749,0.2737,0.1753,0.1875,1.0203
9,0.2274,0.0718,0.2679,0.2027,0.1844,1.0105


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [21]:
predict_model(gbr);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Gradient Boosting Regressor,0.2118,0.0662,0.2574,0.2605,0.1754,0.9046


In [22]:
newpred3 = predict_model(gbr, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Gradient Boosting Regressor,0.2201,0.0745,0.273,0.1697,0.1855,0.9762


In [23]:
cat = create_model('catboost')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2412,0.0812,0.2849,0.1043,0.1949,1.0492
1,0.2655,0.102,0.3193,-0.1276,0.2203,1.201
2,0.2467,0.0888,0.298,0.0199,0.2063,1.0918
3,0.2475,0.0874,0.2956,0.0397,0.2036,1.0739
4,0.248,0.0854,0.2922,0.0707,0.1997,1.0917
5,0.2245,0.0757,0.2752,0.1688,0.1895,0.9533
6,0.2325,0.0804,0.2836,0.1118,0.191,0.8726
7,0.2308,0.0774,0.2782,0.1413,0.1899,0.9357
8,0.2302,0.0748,0.2735,0.1764,0.1856,0.9469
9,0.2257,0.0752,0.2743,0.1644,0.1876,0.9513


In [24]:
cat_tune = tune_model(cat)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2391,0.0809,0.2844,0.1071,0.1947,1.0506
1,0.2528,0.0899,0.2998,0.0061,0.2066,1.1198
2,0.235,0.0793,0.2816,0.1249,0.1945,1.0397
3,0.2353,0.0776,0.2785,0.1477,0.1907,0.9892
4,0.2402,0.0809,0.2844,0.1191,0.1937,1.0134
5,0.2186,0.0693,0.2633,0.2394,0.1803,0.9212
6,0.2258,0.0744,0.2729,0.1779,0.185,0.9234
7,0.2227,0.073,0.2701,0.1907,0.1827,0.8748
8,0.2257,0.072,0.2683,0.2073,0.1824,0.9326
9,0.2216,0.0701,0.2648,0.2212,0.1803,0.9031


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [25]:
predict_model(cat_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,CatBoost Regressor,0.2061,0.0628,0.2505,0.2995,0.1693,0.845


In [26]:
newpred5 = predict_model(cat_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,CatBoost Regressor,0.219,0.0745,0.2729,0.1703,0.1843,0.9257


In [27]:
lgbm = create_model('lightgbm')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2612,0.0958,0.3095,-0.0572,0.2109,1.1593
1,0.278,0.1117,0.3341,-0.2349,0.2301,1.2632
2,0.2525,0.0955,0.309,-0.0541,0.213,1.0821
3,0.2468,0.0875,0.2958,0.0385,0.2033,1.0109
4,0.2495,0.0909,0.3014,0.0107,0.2062,1.0829
5,0.2281,0.0775,0.2784,0.1492,0.1914,0.9461
6,0.2306,0.0804,0.2836,0.112,0.1914,0.8497
7,0.233,0.0791,0.2813,0.1225,0.1912,0.9136
8,0.2308,0.0768,0.2771,0.1545,0.1878,0.9637
9,0.2243,0.0748,0.2734,0.1696,0.1875,0.9662


In [28]:
lgbm_tune = tune_model(lgbm)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2379,0.0779,0.2791,0.14,0.1914,1.0437
1,0.2457,0.0841,0.29,0.0699,0.1988,1.0494
2,0.2382,0.0803,0.2834,0.1132,0.1952,1.0382
3,0.2339,0.0766,0.2768,0.158,0.1881,0.9487
4,0.2378,0.0792,0.2815,0.1375,0.1922,1.0046
5,0.219,0.0686,0.2619,0.2475,0.1797,0.9333
6,0.2273,0.0752,0.2741,0.1701,0.1861,0.9451
7,0.2214,0.0714,0.2672,0.2082,0.1815,0.8875
8,0.2263,0.0717,0.2677,0.2111,0.1826,0.966
9,0.2195,0.07,0.2645,0.2228,0.1806,0.9309


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [29]:
predict_model(lgbm_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,0.2073,0.0625,0.2499,0.3027,0.17,0.8911




In [30]:
newpred5 = predict_model(lgbm_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,0.219,0.0738,0.2716,0.178,0.1843,0.9562




In [31]:
br = create_model('br')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2356,0.0805,0.2836,0.112,0.1925,0.9847
1,0.2485,0.0876,0.2959,0.0314,0.2022,1.0203
2,0.2284,0.0787,0.2806,0.1311,0.1927,0.9596
3,0.23,0.0747,0.2733,0.1791,0.1859,0.9295
4,0.2326,0.0785,0.2801,0.1455,0.1902,0.946
5,0.214,0.0688,0.2623,0.2452,0.1781,0.8644
6,0.22,0.0737,0.2714,0.1865,0.1831,0.8457
7,0.222,0.0741,0.2722,0.1784,0.1841,0.8653
8,0.2197,0.0698,0.2642,0.2314,0.1798,0.9083
9,0.2134,0.0674,0.2597,0.2511,0.1776,0.8841


In [32]:
br_tune = tune_model(br)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2339,0.0797,0.2823,0.1204,0.1917,0.9743
1,0.2497,0.0887,0.2978,0.019,0.2035,1.0227
2,0.2272,0.0787,0.2805,0.1312,0.1927,0.9498
3,0.229,0.0744,0.2728,0.1822,0.1856,0.9257
4,0.2323,0.0783,0.2799,0.1471,0.1901,0.9464
5,0.2141,0.0689,0.2625,0.2436,0.1783,0.8617
6,0.2195,0.0735,0.2712,0.1879,0.1829,0.8409
7,0.2217,0.0739,0.2718,0.1804,0.184,0.8667
8,0.2192,0.0696,0.2638,0.234,0.1795,0.9051
9,0.2133,0.0675,0.2598,0.2504,0.1776,0.8781


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [34]:
predict_model(br_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Bayesian Ridge,0.2003,0.0624,0.2498,0.3033,0.168,0.7841


In [36]:
newpred9 = predict_model(br_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Bayesian Ridge,0.219,0.0771,0.2777,0.1405,0.1871,0.8787


In [43]:
blend1 = blend_models([rf_tune, lgbm_tune])

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2382,0.0779,0.279,0.1407,0.1917,1.0593
1,0.2454,0.0841,0.29,0.0697,0.1994,1.0723
2,0.238,0.0796,0.2821,0.1217,0.1947,1.0549
3,0.2349,0.0766,0.2767,0.1587,0.1886,0.9776
4,0.2381,0.0786,0.2804,0.1438,0.1918,1.0189
5,0.2197,0.0688,0.2624,0.2445,0.1804,0.9451
6,0.2278,0.0753,0.2743,0.1688,0.1862,0.9424
7,0.2239,0.072,0.2683,0.2015,0.1824,0.9031
8,0.2271,0.0721,0.2684,0.2067,0.1831,0.9678
9,0.2204,0.0694,0.2634,0.2292,0.1803,0.9456


In [44]:
blend1_tune = tune_model(blend1)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2379,0.0777,0.2788,0.1421,0.1913,1.0493
1,0.2454,0.084,0.2897,0.0715,0.1988,1.0577
2,0.2381,0.08,0.2828,0.1171,0.1949,1.0445
3,0.2342,0.0765,0.2766,0.1591,0.1882,0.9594
4,0.2379,0.0789,0.2809,0.1406,0.1919,1.01
5,0.2192,0.0686,0.2619,0.247,0.1799,0.9376
6,0.2274,0.0752,0.2741,0.1701,0.1861,0.944
7,0.2222,0.0716,0.2675,0.2062,0.1818,0.8933
8,0.2266,0.0718,0.2679,0.21,0.1827,0.9667
9,0.2196,0.0697,0.264,0.2261,0.1804,0.936


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [46]:
predict_model(blend1_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2079,0.0626,0.2501,0.3016,0.1702,0.8966




In [48]:
newpred5 = predict_model(blend1_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2194,0.0737,0.2715,0.1787,0.1843,0.9605




In [49]:
blend2 = blend_models([rf_tune, lgbm_tune, cat_tune])

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2381,0.0783,0.2798,0.136,0.192,1.0552
1,0.2472,0.0854,0.2921,0.056,0.2011,1.0869
2,0.2363,0.079,0.2811,0.1279,0.1941,1.0481
3,0.2346,0.0764,0.2764,0.1605,0.1887,0.9804
4,0.2383,0.0789,0.281,0.1406,0.1919,1.0158
5,0.2191,0.0687,0.2621,0.2463,0.1799,0.9366
6,0.2267,0.0746,0.2732,0.176,0.1854,0.9354
7,0.2231,0.0721,0.2684,0.2007,0.1822,0.8925
8,0.2263,0.0718,0.268,0.209,0.1826,0.9554
9,0.2205,0.0694,0.2634,0.2295,0.1799,0.9308


In [50]:
blend2_tune = tune_model(blend2)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2378,0.0779,0.279,0.1407,0.1914,1.0497
1,0.2464,0.0847,0.291,0.0637,0.2,1.071
2,0.2371,0.0794,0.2819,0.123,0.1944,1.0438
3,0.2341,0.0763,0.2763,0.1614,0.1882,0.9665
4,0.2379,0.0789,0.281,0.1405,0.1919,1.0106
5,0.2189,0.0686,0.2618,0.2476,0.1797,0.9348
6,0.2268,0.0747,0.2734,0.1746,0.1856,0.9392
7,0.2223,0.0717,0.2678,0.2044,0.1818,0.8901
8,0.2262,0.0717,0.2678,0.2106,0.1825,0.9594
9,0.22,0.0695,0.2637,0.2277,0.1801,0.9302


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [52]:
predict_model(blend2_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2074,0.0624,0.2498,0.3032,0.1698,0.8867




In [54]:
newpred6 = predict_model(blend2_tune, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2193,0.0736,0.2714,0.1794,0.184,0.9541




In [38]:
blend3 = blend_models([rf_tune, lgbm_tune, gbr, cat_tune, et, br_tune])

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2369,0.0777,0.2788,0.1423,0.1909,1.0315
1,0.2485,0.0865,0.2941,0.043,0.2026,1.0914
2,0.2353,0.079,0.281,0.1283,0.1941,1.036
3,0.2331,0.0753,0.2744,0.1729,0.1876,0.9825
4,0.2354,0.0772,0.2779,0.1592,0.1903,1.0245
5,0.218,0.0687,0.2622,0.2457,0.18,0.9333
6,0.2247,0.0738,0.2717,0.185,0.1841,0.9186
7,0.2239,0.0723,0.2689,0.1981,0.1827,0.8997
8,0.2249,0.0709,0.2662,0.2196,0.1813,0.9471
9,0.2182,0.0684,0.2616,0.2399,0.179,0.9256


In [39]:
blend3_tune = tune_model(blend3)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2351,0.0763,0.2762,0.1577,0.1893,1.0296
1,0.2457,0.0845,0.2908,0.0649,0.2,1.0696
2,0.2356,0.0788,0.2806,0.1307,0.1936,1.0336
3,0.2334,0.0755,0.2748,0.1704,0.1877,0.981
4,0.2351,0.0771,0.2776,0.1607,0.1901,1.0151
5,0.2177,0.0686,0.2619,0.2474,0.18,0.9377
6,0.2254,0.074,0.2721,0.1825,0.1845,0.9272
7,0.2235,0.0716,0.2677,0.2053,0.1819,0.9052
8,0.2255,0.0709,0.2662,0.2199,0.1817,0.9603
9,0.218,0.0682,0.2612,0.2423,0.179,0.9339


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [41]:
predict_model(blend3);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2072,0.0625,0.25,0.3025,0.1698,0.8822




In [43]:
newpred7 = predict_model(blend3, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2181,0.0732,0.2706,0.1842,0.1833,0.9509




In [44]:
blend4 = blend_models([cat_tune, lgbm_tune, br_tune])

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2351,0.0776,0.2786,0.143,0.1904,1.0176
1,0.2478,0.0862,0.2937,0.0462,0.2016,1.0605
2,0.2324,0.0783,0.2799,0.1352,0.1928,1.0068
3,0.2318,0.0751,0.274,0.1751,0.1867,0.9521
4,0.2354,0.0783,0.2799,0.147,0.1906,0.9847
5,0.2161,0.068,0.2608,0.2535,0.1782,0.9024
6,0.2234,0.0735,0.2712,0.1879,0.1836,0.9015
7,0.2211,0.0722,0.2687,0.1991,0.182,0.8743
8,0.223,0.0706,0.2656,0.2232,0.1808,0.933
9,0.2173,0.0686,0.2619,0.2382,0.1787,0.902


In [46]:
blend4_tune = tune_model(blend4)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.234,0.0772,0.2778,0.1483,0.1895,1.0051
1,0.2465,0.0853,0.2921,0.0563,0.2,1.036
2,0.2319,0.0786,0.2804,0.1318,0.1929,0.9938
3,0.231,0.0749,0.2737,0.1766,0.1862,0.9374
4,0.2342,0.078,0.2793,0.1505,0.1903,0.9745
5,0.2153,0.0679,0.2605,0.2552,0.1778,0.8949
6,0.2229,0.0736,0.2713,0.187,0.1836,0.8929
7,0.2209,0.0721,0.2686,0.2,0.1821,0.8754
8,0.2223,0.0702,0.2649,0.2275,0.1805,0.9345
9,0.2158,0.0683,0.2613,0.2417,0.1785,0.9028


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [48]:
predict_model(blend4_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2031,0.0617,0.2484,0.311,0.168,0.836




In [50]:
newpred8 = predict_model(blend4, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2184,0.0744,0.2728,0.1706,0.1843,0.9189




In [51]:
blend5 = blend_models([br_tune, et, rf_tune])

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2357,0.0767,0.2769,0.1537,0.1896,1.0285
1,0.2458,0.085,0.2915,0.0601,0.2005,1.0713
2,0.2342,0.0782,0.2797,0.1362,0.193,1.0287
3,0.2334,0.0754,0.2745,0.1718,0.1878,0.989
4,0.2342,0.0766,0.2768,0.1655,0.1896,1.0134
5,0.2174,0.0688,0.2622,0.2455,0.1801,0.9352
6,0.2249,0.074,0.272,0.1829,0.1843,0.9153
7,0.2246,0.0721,0.2685,0.2006,0.1824,0.9112
8,0.2251,0.0707,0.2659,0.2218,0.1814,0.9567
9,0.2175,0.0677,0.2602,0.2479,0.1786,0.936


In [52]:
blend5_tune = tune_model(blend5)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.2342,0.0765,0.2766,0.1554,0.1889,1.0056
1,0.2461,0.0855,0.2924,0.0545,0.2008,1.0543
2,0.2315,0.0779,0.2791,0.1398,0.1924,1.0013
3,0.2315,0.0746,0.2732,0.1801,0.1867,0.9712
4,0.2325,0.0764,0.2763,0.1686,0.1889,0.9946
5,0.2155,0.0684,0.2616,0.2489,0.1791,0.9137
6,0.2229,0.0735,0.2711,0.1885,0.1834,0.8945
7,0.2233,0.0722,0.2687,0.1992,0.1824,0.8999
8,0.2231,0.07,0.2645,0.2295,0.1804,0.9431
9,0.2159,0.0674,0.2596,0.2514,0.178,0.9187


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [54]:
predict_model(blend5_tune);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2053,0.0624,0.2498,0.3035,0.169,0.859


In [56]:
newpred10 = predict_model(blend5, data=data_unseen)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.2191,0.0737,0.2715,0.1787,0.1838,0.9544


In [128]:
save_model(blend4, "indycar_cat_lgbm_prequaly_model_v1")

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['DriverID', 'Rookie', 'DRFAvg',
                                              'DTAvg', 'DTTAvg', 'DNFRate',
                                              'TDNFRate', 'DriverElo',
                                              'DriverTElo', 'DriverTTElo',
                                              'DriverRitmo', 'TeamID', 'TRP',
                                              'TTP', 'TeamDNFRate', 'TeamElo',
                                              'TeamTElo', 'TeamRitmo',
                                              'EngineID', 'EngineElo',
                                              'EngineTElo', 'EngineTTElo',
                                              'TrackID', 'EventTr...
                  VotingRegressor(estimators=[('CatBoost Regressor',
                                               <catboost.core.CatBoostRegressor object at 0x000001539B872690>