In [2]:
from pycaret.regression import *
import pandas as pd

# โหลดข้อมูลที่คุณคลีนแล้ว
data = pd.read_csv('cleaned_air4thai.csv')

# ตรวจสอบว่ามีคอลัมน์ 'DATETIMEDATA' ในข้อมูลหรือไม่
if 'DATETIMEDATA' in data.columns:
    # แปลงคอลัมน์ DATETIMEDATA ให้เป็น datetime
    data['DATETIMEDATA'] = pd.to_datetime(data['DATETIMEDATA'])
    
    # สร้างคอลัมน์ใหม่
    data['hour'] = data['DATETIMEDATA'].dt.hour
    data['day_of_week'] = data['DATETIMEDATA'].dt.dayofweek
    data['day'] = data['DATETIMEDATA'].dt.day
    data['month'] = data['DATETIMEDATA'].dt.month
else:
    print("Column 'DATETIMEDATA' not found in the dataset. Please check your dataset.")

# ตั้งค่าสภาพแวดล้อม PyCaret
exp_reg = setup(data=data, target='PM25', session_id=123,
                ignore_features=['DATETIMEDATA', 'PM10', 'O3', 'CO', 'NO2', 'SO2', 'WS', 'TEMP', 'RH', 'WD'],
                numeric_features=['hour', 'day_of_week', 'day', 'month'])

# เปรียบเทียบโมเดล
best_model = compare_models()

# สร้างโมเดล
created_model = create_model(best_model)

# ปรับแต่งโมเดล
tuned_model = tune_model(created_model)

# ประเมินโมเดล
evaluate_model(tuned_model)

# บันทึกโมเดล
save_model(tuned_model, 'final_pm25_prediction_model')


Unnamed: 0,Description,Value
0,Session id,123
1,Target,PM25
2,Target type,Regression
3,Original data shape,"(5568, 12)"
4,Transformed data shape,"(5568, 6)"
5,Transformed train set shape,"(3897, 6)"
6,Transformed test set shape,"(1671, 6)"
7,Ignore features,10
8,Numeric features,4
9,Categorical features,1


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,0.1358,0.5443,0.7113,0.9802,0.0388,0.008,0.06
dt,Decision Tree Regressor,0.1617,0.8869,0.9115,0.968,0.05,0.0093,0.014
rf,Random Forest Regressor,0.571,1.0912,1.0337,0.9605,0.0563,0.0339,0.079
knn,K Neighbors Regressor,1.6511,4.7981,2.1868,0.8261,0.1184,0.0979,0.016
lightgbm,Light Gradient Boosting Machine,1.9499,6.2684,2.5001,0.7733,0.1399,0.1192,0.116
gbr,Gradient Boosting Regressor,2.8073,12.8466,3.5821,0.5353,0.1944,0.1719,0.041
ada,AdaBoost Regressor,3.5973,19.1363,4.3732,0.3072,0.2436,0.2327,0.044
ridge,Ridge Regression,3.9855,26.1096,5.1084,0.0554,0.2755,0.25,0.013
lar,Least Angle Regression,3.9855,26.1096,5.1084,0.0554,0.2755,0.25,0.014
lr,Linear Regression,3.9855,26.1096,5.1084,0.0554,0.2755,0.25,0.522


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.196,0.6753,0.8217,0.9733,0.0499,0.0133
1,0.1971,0.8662,0.9307,0.9682,0.0541,0.0124
2,0.1613,0.5389,0.7341,0.9803,0.0388,0.0088
3,0.0514,0.1444,0.38,0.9946,0.0225,0.0033
4,0.1175,0.4032,0.635,0.9854,0.0363,0.0071
5,0.0985,0.3137,0.5601,0.9894,0.0335,0.006
6,0.1072,0.4654,0.6822,0.9841,0.0337,0.0057
7,0.11,0.3295,0.574,0.988,0.0311,0.0065
8,0.2012,1.2472,1.1168,0.9552,0.0485,0.0095
9,0.118,0.4597,0.678,0.9838,0.0397,0.0073


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2.2702,8.4928,2.9142,0.6646,0.1614,0.1387
1,2.3839,10.0615,3.172,0.6304,0.1685,0.1454
2,2.4503,9.6499,3.1064,0.6477,0.1686,0.1481
3,2.3758,9.132,3.0219,0.6555,0.1695,0.15
4,2.4027,9.4927,3.081,0.6551,0.1767,0.1535
5,2.4635,9.8943,3.1455,0.6644,0.1719,0.1522
6,2.4842,9.92,3.1496,0.6616,0.175,0.1555
7,2.2986,8.6378,2.939,0.6856,0.1686,0.148
8,2.555,10.8446,3.2931,0.6103,0.1763,0.1554
9,2.5199,10.8099,3.2878,0.6182,0.1709,0.1479


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['hour', 'day_of_week', 'day',
                                              'month'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['stationID'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('onehot_encoding',
                  TransformerWrapper(include=['stationID'],
                                     transformer=OneHotEncoder(cols=['stationID'],
                                                               handle_missing='return_nan',
                                                               use_cat_names=True))),
                 ('trained_model',
                  ExtraTreesRegressor(n_jobs=-1, random_state=123))]),
 'final_pm25_prediction_model.pkl')