In [26]:
# ==== 1) Install PyCaret ====
!pip -q install -U pycaret pandas scikit-learn


In [27]:
# ==== 2) Load dataset ====
import pandas as pd

path = "/kaggle/input/advertisingcsv/Advertising.csv"
df = pd.read_csv(path)

print(df.shape)
df.head()


(200, 5)


Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [29]:
# ==== 3) PyCaret Regression ====
from pycaret.regression import *

reg = setup(
    data=df,
    target="Sales",
    session_id=42,
    normalize=True,
    transform_target=True,
    fold=5,
    use_gpu=False,
    verbose=True
)


Unnamed: 0,Description,Value
0,Session id,42
1,Target,Sales
2,Target type,Regression
3,Original data shape,"(200, 5)"
4,Transformed data shape,"(200, 5)"
5,Transformed train set shape,"(140, 5)"
6,Transformed test set shape,"(60, 5)"
7,Numeric features,4
8,Preprocess,True
9,Imputation type,simple


In [30]:
# ==== 4) Compare, Tune, Finalize ====
best_models = compare_models(n_select=3, sort="RMSE")
tuned = [tune_model(m, optimize="RMSE") for m in best_models]
blended = blend_models(tuned)
final_model = finalize_model(blended)


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,0.4803,0.5286,0.6932,0.9771,0.0754,0.0567,0.098
rf,Random Forest Regressor,0.6841,0.8076,0.8942,0.9672,0.0855,0.0672,0.13
gbr,Gradient Boosting Regressor,0.6617,0.8033,0.896,0.9674,0.0971,0.0692,0.06
xgboost,Extreme Gradient Boosting,0.756,0.8655,0.928,0.9653,0.0923,0.0701,0.058
catboost,CatBoost Regressor,0.6415,1.1269,1.0284,0.9516,0.1148,0.0812,0.812
ada,AdaBoost Regressor,0.9253,1.4196,1.184,0.9439,0.1139,0.0904,0.066
lightgbm,Light Gradient Boosting Machine,1.0347,2.1082,1.407,0.9125,0.1493,0.1229,3.87
huber,Huber Regressor,1.1037,2.5472,1.5624,0.8916,0.162,0.1363,0.028
lar,Least Angle Regression,1.1791,2.5387,1.5633,0.8934,0.1593,0.1365,0.024
lr,Linear Regression,1.1791,2.5387,1.5633,0.8934,0.1593,0.1365,1.37


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.6161,0.6348,0.7967,0.969,0.0489,0.0435
1,0.6888,0.8104,0.9002,0.968,0.0592,0.0485
2,0.9018,1.3776,1.1737,0.952,0.1178,0.0858
3,0.9322,1.713,1.3088,0.9465,0.0805,0.0676
4,0.696,2.0915,1.4462,0.8998,0.2509,0.1931
Mean,0.767,1.3255,1.1251,0.9471,0.1114,0.0877
Std,0.126,0.5444,0.244,0.0252,0.0736,0.0548


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.792,1.024,1.0119,0.95,0.0803,0.0621
1,0.6914,0.7215,0.8494,0.9715,0.0555,0.0474
2,0.865,1.2819,1.1322,0.9554,0.0838,0.0684
3,0.9227,1.2662,1.1252,0.9605,0.0743,0.0688
4,0.7798,1.2857,1.1339,0.9384,0.1961,0.1502
Mean,0.8102,1.1159,1.0505,0.9552,0.098,0.0794
Std,0.0788,0.2205,0.1106,0.011,0.05,0.0362


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.7904,1.0983,1.048,0.9463,0.0743,0.061
1,0.738,0.8208,0.906,0.9676,0.0529,0.0475
2,0.7593,0.7874,0.8874,0.9726,0.0834,0.066
3,0.6844,0.7363,0.8581,0.977,0.0576,0.0514
4,0.6743,1.7685,1.3299,0.9153,0.2352,0.1754
Mean,0.7293,1.0423,1.0059,0.9558,0.1007,0.0802
Std,0.0441,0.3842,0.1747,0.0228,0.0682,0.048


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.5982,0.6499,0.8062,0.9682,0.0686,0.0491
1,0.4537,0.3849,0.6204,0.9848,0.0399,0.0315
2,0.5838,0.6126,0.7827,0.9787,0.0439,0.0368
3,0.6113,0.5187,0.7202,0.9838,0.0586,0.0495
4,0.5466,0.882,0.9391,0.9578,0.1889,0.1296
Mean,0.5587,0.6096,0.7737,0.9747,0.08,0.0593
Std,0.0568,0.1641,0.1048,0.0103,0.0554,0.0358


In [31]:
# ==== 5) Evaluate & Predict ====
evaluate_model(final_model)
preds = predict_model(final_model)
preds.head()


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,0.9127,2.1982,1.4826,0.9195,0.2484,0.1085


Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales,prediction_label
95,96,163.300003,31.6,52.900002,16.9,16.373528
15,16,195.399994,47.700001,52.900002,22.4,21.578635
30,31,292.899994,28.299999,43.200001,21.4,20.936403
158,159,11.7,36.900002,45.200001,7.3,2.525825
128,129,220.300003,49.0,3.2,24.700001,23.696208


In [32]:
# ==== 6) Save ====
save_model(final_model, "/kaggle/working/advertising_best_model")
preds.to_csv("/kaggle/working/advertising_predictions.csv", index=False)

print("✅ Done! Files saved in /kaggle/working/")
!ls -lh /kaggle/working


Transformation Pipeline and Model Successfully Saved
✅ Done! Files saved in /kaggle/working/
total 4.8M
-rw-r--r-- 1 root root 4.5M Nov  2 09:58 advertising_best_model.pkl
-rw-r--r-- 1 root root 2.5K Nov  2 09:58 advertising_predictions.csv
drwxr-xr-x 4 root root 4.0K Nov  2 09:38 catboost_info
drwxr-xr-x 2 root root 4.0K Nov  2 09:00 cudf
drwxr-xr-x 3 root root 4.0K Nov  2 09:01 cuml
drwxr-xr-x 3 root root 4.0K Nov  2 09:06 cupy
-rw-r--r-- 1 root root 268K Nov  2 09:58 logs.log
drwxr-xr-x 3 root root 4.0K Nov  2 09:06 rmm
