In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pycaret
from pycaret.classification import *
from pycaret.datasets import get_data

In [2]:
import pandas as pd
from pycaret.regression import *

train_data = pd.read_csv('PM25.csv')

train_data['DATETIMEDATA'] = pd.to_datetime(train_data['DATETIMEDATA'])

regression_setup = setup(train_data, target='PM25', session_id=123, date_features=['DATETIMEDATA'], normalize=True)
best = compare_models()

rf_model = create_model('rf')

tuned_rf_model = tune_model(rf_model)

bagged_model = ensemble_model(tuned_rf_model, n_estimators=20)

last_prediction_time = train_data['DATETIMEDATA'].max()

start_date = last_prediction_time + pd.DateOffset(hours=1)
end_date = start_date + pd.DateOffset(days=30, hours=23)
next_week_hours = pd.date_range(start=start_date, end=end_date, freq='D')

next_week_data = pd.DataFrame({'DATETIMEDATA': next_week_hours})

next_week_data['O3'] = 0
next_week_data['WS'] = 0
next_week_data['TEMP'] = 0
next_week_data['RH'] = 0
next_week_data['WD'] = 0

model_predictions = predict_model(bagged_model, data=next_week_data)

mpdf = pd.DataFrame(model_predictions, columns=['DATETIMEDATA', 'prediction_label'])

print(mpdf)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,PM25
2,Target type,Regression
3,Original data shape,"(1460, 7)"
4,Transformed data shape,"(1460, 9)"
5,Transformed train set shape,"(1021, 9)"
6,Transformed test set shape,"(439, 9)"
7,Numeric features,5
8,Date features,1
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,3.1525,16.7181,4.0802,0.3797,0.214,0.1884,0.035
catboost,CatBoost Regressor,3.2205,17.2369,4.1419,0.3627,0.2169,0.1913,0.167
et,Extra Trees Regressor,3.1506,17.4221,4.1613,0.3551,0.2168,0.1873,0.026
lightgbm,Light Gradient Boosting Machine,3.2663,17.9521,4.226,0.3353,0.2216,0.1941,87.943
gbr,Gradient Boosting Regressor,3.2995,17.9225,4.2251,0.335,0.2239,0.1989,0.015
xgboost,Extreme Gradient Boosting,3.4341,19.6651,4.4204,0.2724,0.2326,0.2036,0.159
knn,K Neighbors Regressor,3.4481,19.978,4.4541,0.2646,0.2334,0.2053,0.009
ada,AdaBoost Regressor,3.7536,22.2866,4.7115,0.172,0.2575,0.2414,0.01
ridge,Ridge Regression,3.8218,23.5942,4.8422,0.1324,0.2636,0.2388,0.007
lr,Linear Regression,3.822,23.5951,4.8423,0.1323,0.2636,0.2388,0.253


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.0998,18.0992,4.2543,0.3126,0.2154,0.1866
1,3.2297,17.6284,4.1986,0.232,0.2267,0.2034
2,2.717,12.5377,3.5409,0.2958,0.1999,0.1666
3,3.285,16.7883,4.0974,0.4214,0.2174,0.1944
4,3.2089,16.6855,4.0848,0.4372,0.2191,0.1977
5,3.1243,17.0116,4.1245,0.4227,0.2162,0.1816
6,3.2062,16.2941,4.0366,0.3749,0.2131,0.1914
7,3.0298,13.7471,3.7077,0.4446,0.2031,0.1914
8,3.303,17.9036,4.2313,0.3905,0.2057,0.1796
9,3.3217,20.4857,4.5261,0.4649,0.2234,0.1912


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.1906,17.8749,4.2279,0.3211,0.2181,0.1951
1,3.0665,16.4039,4.0502,0.2854,0.2214,0.1946
2,2.8289,12.5483,3.5424,0.2952,0.2008,0.1759
3,3.4327,18.1919,4.2652,0.373,0.2297,0.2068
4,3.2237,17.0428,4.1283,0.4251,0.2258,0.2026
5,3.2524,18.5995,4.3127,0.3688,0.2257,0.188
6,3.4485,16.9908,4.122,0.3482,0.2231,0.209
7,3.2397,16.3515,4.0437,0.3394,0.2189,0.2032
8,3.3606,18.8237,4.3386,0.3592,0.2127,0.1844
9,3.5958,23.7021,4.8685,0.3809,0.2405,0.2089


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.2005,18.6075,4.3136,0.2933,0.2216,0.1957
1,3.1512,16.6754,4.0835,0.2735,0.2233,0.2006
2,2.7196,11.9762,3.4607,0.3274,0.1964,0.1689
3,3.3484,17.3218,4.1619,0.403,0.2223,0.2009
4,3.284,17.2087,4.1483,0.4195,0.2262,0.2059
5,3.1979,17.6555,4.2018,0.4008,0.222,0.1876
6,3.3816,16.7793,4.0963,0.3563,0.2182,0.2026
7,3.1321,14.9627,3.8682,0.3955,0.2087,0.1962
8,3.2745,17.9082,4.2318,0.3903,0.2048,0.177
9,3.5046,22.0854,4.6995,0.4231,0.2318,0.2036


   DATETIMEDATA  prediction_label
0    2024-03-02         23.375613
1    2024-03-03         22.811641
2    2024-03-04         23.019189
3    2024-03-05         23.265905
4    2024-03-06         23.058905
5    2024-03-07         23.374783
6    2024-03-08         23.699368
7    2024-03-09         23.657663
8    2024-03-10         22.885620
9    2024-03-11         22.569073
10   2024-03-12         22.631495
11   2024-03-13         22.536195
12   2024-03-14         22.410218
13   2024-03-15         21.996200
14   2024-03-16         21.623766
15   2024-03-17         21.501187
16   2024-03-18         21.437859
17   2024-03-19         22.256311
18   2024-03-20         24.075011
19   2024-03-21         25.732694
20   2024-03-22         23.822451
21   2024-03-23         21.617258
22   2024-03-24         21.456583
23   2024-03-25         21.347776
24   2024-03-26         21.405223
25   2024-03-27         21.428135
26   2024-03-28         21.773217
27   2024-03-29         21.823734
28   2024-03-3

In [3]:
mpdf.to_csv('predict_PM25.csv', index=False)