In [13]:
import os
import pandas as pd
import numpy as np

file_path = "2024_01_08_AQI_Weather_dataset.csv"
data = pd.read_csv(file_path, low_memory = False)
sorted_data = data.sort_values(by = ['sitename', 'datacreationdate'])

In [14]:
sorted_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58550 entries, 0 to 58549
Data columns (total 24 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   datacreationdate    58550 non-null  object 
 1   sitename            58550 non-null  object 
 2   aqi                 58550 non-null  float64
 3   so2                 58550 non-null  float64
 4   co                  58550 non-null  float64
 5   o3                  58550 non-null  float64
 6   o3_8hr              58550 non-null  float64
 7   pm10                58550 non-null  float64
 8   pm2.5               58550 non-null  float64
 9   no2                 58550 non-null  float64
 10  nox                 58550 non-null  float64
 11  no                  58550 non-null  float64
 12  co_8hr              58550 non-null  float64
 13  pm2.5_avg           58550 non-null  float64
 14  pm10_avg            58550 non-null  float64
 15  so2_avg             58550 non-null  float64
 16  測站氣壓

In [15]:
data['datacreationdate'] = pd.to_datetime(data['datacreationdate'])

In [16]:
data.rename(columns={
    '測站氣壓(hPa)': 'station_pressure_hpa',
    '氣溫(℃)': 'temperature_c',
    '相對溼度(%)': 'relative_humidity',
    '風速(m/s)': 'wind_speed_m_s',
    '風向(360degree)': 'wind_direction_deg',
    '最大瞬間風(m/s)': 'max_instant_wind_m_s',
    '最大瞬間風風向(360degree)': 'max_instant_wind_direction_deg',
    '降水量(mm)': 'precipitation_mm'
}, inplace=True)

In [17]:
summary = data.describe()
print(summary)

          datacreationdate           aqi           so2            co  \
count                58550  58550.000000  58550.000000  58550.000000   
mean   2024-05-02 00:00:00     51.876362      1.381404      0.395295   
min    2024-01-01 01:00:00     -1.000000      0.000000      0.000000   
25%    2024-03-02 00:00:00     31.000000      0.800000      0.190000   
50%    2024-05-02 00:00:00     47.000000      1.200000      0.300000   
75%    2024-07-02 00:00:00     67.000000      1.700000      0.470000   
max    2024-08-31 23:00:00    169.000000     80.400000      3.230000   
std                    NaN     26.882942      1.335122      0.316834   

                 o3        o3_8hr          pm10         pm2.5           no2  \
count  58550.000000  58550.000000  58550.000000  58550.000000  58550.000000   
mean      27.279481     27.334644     31.272741     15.875730     11.794844   
min        0.000000      0.300000      0.000000      0.000000      0.000000   
25%       14.100000     15.900000  

In [18]:
class_mapping = {
    'Good': 0,
    'Moderate': 1,
    'Unhealthy for Sensitive Groups': 2,
    'Unhealthy': 3,
    'Very Unhealthy': 4,
    'Hazardous': 5
}

In [19]:
# 定義 AQI 分類函數
def aqi_to_category(aqi):
    if aqi <= 50:
        return 0
    elif aqi <= 100:
        return 1
    elif aqi <= 150:
        return 2
    elif aqi <= 200:
        return 3
    elif aqi <= 300:
        return 4
    else:
        return 5

# 應用到數據
data['AQI_level'] = data['aqi'].apply(aqi_to_category)

In [20]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
import lightgbm as lgb

# 模型列表
models = {
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'KNN': KNeighborsClassifier(),
    'XGBoost': xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    'LightGBM': lgb.LGBMClassifier(random_state=42),
    'HistGradientBoosting': HistGradientBoostingClassifier(random_state=42)
}

In [21]:
# 所有特徵列（根據提供數據結構）
all_features = [
    'station_pressure_hpa', 'temperature_c', 'relative_humidity', 'wind_speed_m_s',
    'wind_direction_deg', 'max_instant_wind_m_s', 'max_instant_wind_direction_deg', 
    'precipitation_mm', 'pm2.5', 'pm10', 'so2', 'no2', 'nox', 'o3', 'co', 'o3_8hr',
    'co_8hr', 'pm2.5_avg', 'pm10_avg', 'so2_avg', 'no'
]

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import f1_score
import time

def evaluate_models(X, y):
    """
    自動化評估多個模型的性能，並選擇最佳模型。
    Args:
        X: 特徵數據。
        y: 目標數據。
    Returns:
        best_model_name: 性能最佳的模型名稱。
        best_model: 性能最佳的模型對象。
        results: 所有模型的性能指標。
    """
    results = {}
    best_accuracy = 0
    best_ac_model_name = None
    best_ac_model = None
    best_f1_score = 0
    best_f1_model_name = None
    best_f1_model = None
    
    # 分割數據集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    
    # 遍歷模型列表
    for model_name, model in models.items():
        print(f"Training {model_name}...")
        
        # 計算模型執行時間
        start_time = time.time()
        model.fit(X_train, y_train)
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        y_pred = model.predict(X_test)
        
        # 計算準確率
        accuracy = accuracy_score(y_test, y_pred)
        # 計算F1-score
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        results[model_name] = {
            'F1-score': f1,
            'accuracy': accuracy,
            'time': elapsed_time
        }
        
        print(f"{model_name} accuracy: {accuracy:.4f}")
        print(f"{model_name} F1-score: {f1:.4f}")
        print(f"{model_name} Training Time: {elapsed_time:.2f} seconds")
        print(classification_report(y_test, y_pred))
        
        # 更新 f1-score 最佳模型
        if f1 > best_f1_score:
            best_f1_score = f1
            best_f1_model_name = model_name
            best_f1_model = model
            
        # 更新 f1-score 最佳模型
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_ac_model_name = model_name
            best_ac_model = model
            
    print(f"\nBest Model (accuracy): {best_ac_model_name} with accuracy: {best_accuracy:.4f}")
    print(f"\nBest Model (F1-score): {best_f1_model_name} with F1-score: {best_f1_score:.4f}")
    
    return best_ac_model_name, best_ac_model, results

In [23]:
# 24hr -> 1hr

In [25]:
# 計算滯後特徵
lag_features = []
for lag in range(1, 25):  # 滯後 24 小時
    for feature in all_features:
        lag_features.append(data.groupby('sitename')[feature].shift(lag).rename(f'{feature}_lag_{lag}'))

# 合併滯後特徵
lag_features_df = pd.concat(lag_features, axis=1)
data_with_lags = pd.concat([data.reset_index(drop=True), lag_features_df.reset_index(drop=True)], axis=1)

# 添加目標變數
data_with_lags['AQI_level_next'] = data_with_lags.groupby('sitename')['AQI_level'].shift(-1)

# 刪除 NaN
lag_columns = [f'{feature}_lag_{lag}' for feature in all_features for lag in range(1, 25)]
data_24hr = data_with_lags.dropna(subset=lag_columns + ['AQI_level_next'])
X_24hr = data_24hr[lag_columns]
y_24hr = data_24hr['AQI_level_next']

# 自動化模型選擇並記錄時間與 F1 Score
best_model_name_24hr, best_model_24hr, results_24hr = evaluate_models(X_24hr, y_24hr)

Training Decision Tree...
Decision Tree accuracy: 0.9032
Decision Tree F1-score: 0.9031
Decision Tree Training Time: 33.06 seconds
              precision    recall  f1-score   support

         0.0       0.93      0.94      0.93      6383
         1.0       0.89      0.87      0.88      4567
         2.0       0.78      0.81      0.80       681
         3.0       0.59      0.59      0.59        29

    accuracy                           0.90     11660
   macro avg       0.80      0.80      0.80     11660
weighted avg       0.90      0.90      0.90     11660

Training Random Forest...
Random Forest accuracy: 0.9352
Random Forest F1-score: 0.9347
Random Forest Training Time: 89.22 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.96      6383
         1.0       0.92      0.92      0.92      4567
         2.0       0.89      0.83      0.86       681
         3.0       0.80      0.41      0.55        29

    accuracy                   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Parameters: { "use_label_encoder" } are not used.



XGBoost accuracy: 0.9437
XGBoost F1-score: 0.9436
XGBoost Training Time: 62.68 seconds
              precision    recall  f1-score   support

         0.0       0.96      0.97      0.96      6383
         1.0       0.93      0.93      0.93      4567
         2.0       0.90      0.87      0.88       681
         3.0       0.87      0.69      0.77        29

    accuracy                           0.94     11660
   macro avg       0.91      0.86      0.89     11660
weighted avg       0.94      0.94      0.94     11660

Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090643 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93442
[LightGBM] [Info] Number of data points in the train set: 46640, number of used features: 504
[LightGBM] [Info] Start training from score -0.602526
[LightGBM] [Info] Start training from score -0.937253
[LightGBM] [Info] Start training from score -2.841092
[Lig

In [26]:
importance_df_24hr = pd.DataFrame({
    'feature': X_24hr.columns,
    'importance': best_model_24hr.feature_importances_
}).sort_values(by='importance', ascending=False)

# 顯示 Top 10 特徵
top_10_features_24hr = importance_df_24hr.head(10)
print("Top 10 Features for 24-hour case:")
print(top_10_features_24hr)


Top 10 Features for 24-hour case:
             feature  importance
408  pm2.5_avg_lag_1    0.222589
264        no2_lag_1    0.032886
314         o3_lag_3    0.016160
192      pm2.5_lag_1    0.013383
284       no2_lag_21    0.011166
360     o3_8hr_lag_1    0.007876
313         o3_lag_2    0.007760
193      pm2.5_lag_2    0.006687
286       no2_lag_23    0.006021
400    co_8hr_lag_17    0.005894


In [None]:
# 1hr -> 1hr

In [27]:
# 計算滯後 1 小時的特徵
lag_features = []
for feature in all_features:
    lag_features.append(data.groupby('sitename')[feature].shift(1).rename(f'{feature}_lag_1'))

# 合併滯後特徵
lag_features_df = pd.concat(lag_features, axis=1)
data_with_lags = pd.concat([data.reset_index(drop=True), lag_features_df.reset_index(drop=True)], axis=1)

# 添加目標變數
data_with_lags['AQI_level_next'] = data_with_lags.groupby('sitename')['AQI_level'].shift(-1)

# 刪除 NaN
lag_columns = [f'{feature}_lag_1' for feature in all_features]
data_1hr = data_with_lags.dropna(subset=lag_columns + ['AQI_level_next'])

# 訓練資料
X_1hr = data_1hr[lag_columns]
y_1hr = data_1hr['AQI_level_next']

# 自動化模型選擇
best_model_name_1hr, best_model_1hr, results_1hr = evaluate_models(X_1hr, y_1hr)

Training Decision Tree...
Decision Tree accuracy: 0.9107
Decision Tree F1-score: 0.9107
Decision Tree Training Time: 0.93 seconds
              precision    recall  f1-score   support

         0.0       0.94      0.93      0.94      6383
         1.0       0.89      0.89      0.89      4602
         2.0       0.83      0.83      0.83       691
         3.0       0.64      0.60      0.62        30

    accuracy                           0.91     11706
   macro avg       0.82      0.81      0.82     11706
weighted avg       0.91      0.91      0.91     11706

Training Random Forest...
Random Forest accuracy: 0.9380
Random Forest F1-score: 0.9376
Random Forest Training Time: 12.33 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.96      6383
         1.0       0.93      0.92      0.92      4602
         2.0       0.90      0.87      0.89       691
         3.0       0.79      0.37      0.50        30

    accuracy                    

Parameters: { "use_label_encoder" } are not used.



XGBoost accuracy: 0.9393
XGBoost F1-score: 0.9392
XGBoost Training Time: 1.45 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.96      6383
         1.0       0.93      0.92      0.92      4602
         2.0       0.89      0.88      0.89       691
         3.0       0.71      0.57      0.63        30

    accuracy                           0.94     11706
   macro avg       0.87      0.83      0.85     11706
weighted avg       0.94      0.94      0.94     11706

Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002408 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3899
[LightGBM] [Info] Number of data points in the train set: 46824, number of used features: 21
[LightGBM] [Info] Start training from score -0.606424
[LightGBM] [Info] Start training from score -0.933719
[LightGBM] [Info] Start training from score -2.828994
[LightG

In [30]:
importance_df_1hr = pd.DataFrame({
    'feature': X_1hr.columns,
    'importance': best_model_1hr.feature_importances_
}).sort_values(by='importance', ascending=False)

# 顯示 Top 10 特徵
top_10_features_1hr = importance_df_1hr.head(10)
print("Top 10 Features for 1-hour case:")
print(top_10_features_1hr)

Top 10 Features for 1-hour case:
                   feature  importance
17         pm2.5_avg_lag_1    0.652708
8              pm2.5_lag_1    0.063519
11               no2_lag_1    0.060177
15            o3_8hr_lag_1    0.030955
13                o3_lag_1    0.020974
9               pm10_lag_1    0.018687
12               nox_lag_1    0.017313
18          pm10_avg_lag_1    0.014218
14                co_lag_1    0.011050
7   precipitation_mm_lag_1    0.010824


In [None]:
# 24hr -> 24hr

In [35]:
# 計算滯後 1~24 小時的特徵
lag_features = []
for lag in range(1, 25):  # 滯後 24 小時
    for feature in all_features:
        lag_features.append(data.groupby('sitename')[feature].shift(lag).rename(f'{feature}_lag_{lag}'))

# 合併滯後特徵
lag_features_df = pd.concat(lag_features, axis=1)
data_with_lags = pd.concat([data.reset_index(drop=True), lag_features_df.reset_index(drop=True)], axis=1)

# 添加未來 24 小時的目標變數
for lead in range(1, 25):
    data_with_lags[f'AQI_level_future_{lead}'] = data_with_lags.groupby('sitename')['AQI_level'].shift(-lead)

# 刪除 NaN
future_cols = [f'AQI_level_future_{lead}' for lead in range(1, 25)]
lag_columns = [f'{feature}_lag_{lag}' for feature in all_features for lag in range(1, 25)]
data_future = data_with_lags.dropna(subset=lag_columns + future_cols)

# 訓練資料
X_future = data_future[lag_columns]
y_future = data_future[future_cols]

# 多輸出模型
from sklearn.multioutput import MultiOutputClassifier

start_time = time.time()

multi_model = MultiOutputClassifier(xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))
multi_model.fit(X_future, y_future)

end_time = time.time()
elapsed_time = end_time - start_time
# 預測與評估
y_pred_future = multi_model.predict(X_future)

# 計算每小時的 F1 Score
f1_scores = []
for i in range(24):
    f1 = f1_score(y_future.iloc[:, i], y_pred_future[:, i], average='weighted')
    f1_scores.append(f1)
    print(f"Hour {i+1} F1 Score: {f1:.4f}")

print(f"\nAverage F1 Score for 24 hours: {sum(f1_scores) / 24:.4f}\n")
accuracys = []
for i in range(24):
    ac = accuracy_score(y_future.iloc[:, i], y_pred_future[:, i])
    accuracys.append(ac)
    print(f"Hour {i+1} Accuracy: {ac:.4f}")

print(f"\nAverage Accuracy for 24 hours: {sum(accuracys) / 24:.4f}")

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Hour 1 F1 Score: 0.9975
Hour 2 F1 Score: 0.9949
Hour 3 F1 Score: 0.9909
Hour 4 F1 Score: 0.9884
Hour 5 F1 Score: 0.9859
Hour 6 F1 Score: 0.9853
Hour 7 F1 Score: 0.9814
Hour 8 F1 Score: 0.9802
Hour 9 F1 Score: 0.9784
Hour 10 F1 Score: 0.9755
Hour 11 F1 Score: 0.9737
Hour 12 F1 Score: 0.9717
Hour 13 F1 Score: 0.9713
Hour 14 F1 Score: 0.9671
Hour 15 F1 Score: 0.9677
Hour 16 F1 Score: 0.9684
Hour 17 F1 Score: 0.9664
Hour 18 F1 Score: 0.9665
Hour 19 F1 Score: 0.9644
Hour 20 F1 Score: 0.9650
Hour 21 F1 Score: 0.9657
Hour 22 F1 Score: 0.9649
Hour 23 F1 Score: 0.9641
Hour 24 F1 Score: 0.9673

Average F1 Score for 24 hours: 0.9751

Hour 1 Accuracy: 0.9975
Hour 2 Accuracy: 0.9949
Hour 3 Accuracy: 0.9909
Hour 4 Accuracy: 0.9884
Hour 5 Accuracy: 0.9859
Hour 6 Accuracy: 0.9853
Hour 7 Accuracy: 0.9814
Hour 8 Accuracy: 0.9802
Hour 9 Accuracy: 0.9784
Hour 10 Accuracy: 0.9755
Hour 11 Accuracy: 0.9736
Hour 12 Accuracy: 0.9717
Hour 13 Accuracy: 0.9712
Hour 14 Accuracy: 0.9671
Hour 15 Accuracy: 0.9677
Hou

In [37]:
elapsed_time

1173.9335584640503

In [49]:
'''importance_df_24to24 = pd.DataFrame({
    'feature': X_future.columns,
    'importance': multi_model.feature_importances_
}).sort_values(by='importance', ascending=False)

# 顯示 Top 10 特徵
top_10_features_24to24 = importance_df_24to24.head(10)
print("Top 10 Features for 24to24-hour case:")
print(top_10_features_24to24)'''

'importance_df_24to24 = pd.DataFrame({\n    \'feature\': X_future.columns,\n    \'importance\': multi_model.feature_importances_\n}).sort_values(by=\'importance\', ascending=False)\n\n# 顯示 Top 10 特徵\ntop_10_features_24to24 = importance_df_24to24.head(10)\nprint("Top 10 Features for 24to24-hour case:")\nprint(top_10_features_24to24)'

In [None]:
# Top 10 features

In [None]:
# 1hr -> 1hr

In [39]:
top_10_features_1hr

Unnamed: 0,feature,importance
17,pm2.5_avg_lag_1,0.652708
8,pm2.5_lag_1,0.063519
11,no2_lag_1,0.060177
15,o3_8hr_lag_1,0.030955
13,o3_lag_1,0.020974
9,pm10_lag_1,0.018687
12,nox_lag_1,0.017313
18,pm10_avg_lag_1,0.014218
14,co_lag_1,0.01105
7,precipitation_mm_lag_1,0.010824


In [46]:
X_1hr_top10 = X_1hr[importance_df_1hr.head(10)['feature'].tolist()]
best_model_name_1hr_top_10, best_model_1hr_top_10, results_1hr_top_10 = evaluate_models(X_1hr_top10, y_1hr)

Training Decision Tree...
Decision Tree accuracy: 0.9082
Decision Tree F1-score: 0.9081
Decision Tree Training Time: 0.42 seconds
              precision    recall  f1-score   support

         0.0       0.94      0.93      0.93      6383
         1.0       0.88      0.89      0.89      4602
         2.0       0.83      0.82      0.83       691
         3.0       0.62      0.53      0.57        30

    accuracy                           0.91     11706
   macro avg       0.82      0.79      0.80     11706
weighted avg       0.91      0.91      0.91     11706

Training Random Forest...
Random Forest accuracy: 0.9346
Random Forest F1-score: 0.9343
Random Forest Training Time: 9.46 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95      6383
         1.0       0.93      0.91      0.92      4602
         2.0       0.89      0.87      0.88       691
         3.0       0.82      0.47      0.60        30

    accuracy                     

Parameters: { "use_label_encoder" } are not used.



XGBoost accuracy: 0.9372
XGBoost F1-score: 0.9370
XGBoost Training Time: 1.18 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95      6383
         1.0       0.93      0.91      0.92      4602
         2.0       0.89      0.88      0.89       691
         3.0       0.68      0.50      0.58        30

    accuracy                           0.94     11706
   macro avg       0.86      0.82      0.84     11706
weighted avg       0.94      0.94      0.94     11706

Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1963
[LightGBM] [Info] Number of data points in the train set: 46824, number of used features: 10
[LightGBM] [Info] Start training from score -0.606424
[LightGBM] [Info] Start training from score -0.933719
[LightGBM] [Info] Start training from score -2.828994
[LightG

In [None]:
# 24hr -> 1hr

In [47]:
top_10_features_24hr

Unnamed: 0,feature,importance
408,pm2.5_avg_lag_1,0.222589
264,no2_lag_1,0.032886
314,o3_lag_3,0.01616
192,pm2.5_lag_1,0.013383
284,no2_lag_21,0.011166
360,o3_8hr_lag_1,0.007876
313,o3_lag_2,0.00776
193,pm2.5_lag_2,0.006687
286,no2_lag_23,0.006021
400,co_8hr_lag_17,0.005894


In [48]:
X_24hr_top10 = X_24hr[importance_df_24hr.head(10)['feature'].tolist()]
best_model_name_24hr_top_10, best_model_24hr_top_10, results_24hr_top_10 = evaluate_models(X_24hr_top10, y_24hr)

Training Decision Tree...
Decision Tree accuracy: 0.8991
Decision Tree F1-score: 0.8992
Decision Tree Training Time: 0.41 seconds
              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93      6383
         1.0       0.87      0.87      0.87      4567
         2.0       0.79      0.77      0.78       681
         3.0       0.51      0.72      0.60        29

    accuracy                           0.90     11660
   macro avg       0.78      0.83      0.80     11660
weighted avg       0.90      0.90      0.90     11660

Training Random Forest...
Random Forest accuracy: 0.9326
Random Forest F1-score: 0.9324
Random Forest Training Time: 10.07 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95      6383
         1.0       0.92      0.91      0.92      4567
         2.0       0.88      0.84      0.86       681
         3.0       0.71      0.76      0.73        29

    accuracy                    

Parameters: { "use_label_encoder" } are not used.



XGBoost accuracy: 0.9310
XGBoost F1-score: 0.9308
XGBoost Training Time: 1.33 seconds
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95      6383
         1.0       0.92      0.91      0.91      4567
         2.0       0.86      0.82      0.84       681
         3.0       0.64      0.86      0.74        29

    accuracy                           0.93     11660
   macro avg       0.84      0.89      0.86     11660
weighted avg       0.93      0.93      0.93     11660

Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1964
[LightGBM] [Info] Number of data points in the train set: 46640, number of used features: 10
[LightGBM] [Info] Start training from score -0.602526
[LightGBM] [Info] Start training from score -0.937253
[LightGBM] [Info] Start training from score -2.841092
[LightG