In [11]:
import pandas as pd
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

# 1. Đọc dữ liệu
df = pd.read_csv(r'D:\dow\project\data\processed_iot_dataset.csv')

# 2. Chọn feature và label 
drop_cols = ['Sensor_ID', 'Fault_Type', 'Year', 'Month', 'Day', 'Hour', 'Minute']
feature_cols = [c for c in df.columns if c not in drop_cols + ['Fault_Status']]
X = df[feature_cols]
y = df['Fault_Status']

# 3. Chia train/test (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 4. Khởi tạo model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
lgbm_model = LGBMClassifier(random_state=42)

# 5. Huấn luyện model
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# 6. Dự đoán
y_pred_xgb = xgb_model.predict(X_test)
y_pred_lgbm = lgbm_model.predict(X_test)


[LightGBM] [Info] Number of positive: 11607, number of negative: 27034
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001014 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 38641, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.300380 -> initscore=-0.845487
[LightGBM] [Info] Start training from score -0.845487


In [12]:
# 7. Đánh giá kết quả
print("=== XGBoost ===")
print(classification_report(y_test, y_pred_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("F1-score:", f1_score(y_test, y_pred_xgb, average='weighted'))


=== XGBoost ===
              precision    recall  f1-score   support

           0       0.70      0.96      0.81      6759
           1       0.33      0.04      0.08      2902

    accuracy                           0.69      9661
   macro avg       0.52      0.50      0.44      9661
weighted avg       0.59      0.69      0.59      9661

Confusion Matrix:
 [[6511  248]
 [2779  123]]
Accuracy: 0.6866783976813995
F1-score: 0.5902392398659582


In [13]:
print("\n=== LightGBM ===")
print(classification_report(y_test, y_pred_lgbm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lgbm))
print("Accuracy:", accuracy_score(y_test, y_pred_lgbm))
print("F1-score:", f1_score(y_test, y_pred_lgbm, average='weighted'))


=== LightGBM ===
              precision    recall  f1-score   support

           0       0.70      1.00      0.82      6759
           1       0.35      0.00      0.00      2902

    accuracy                           0.70      9661
   macro avg       0.52      0.50      0.41      9661
weighted avg       0.59      0.70      0.58      9661

Confusion Matrix:
 [[6746   13]
 [2895    7]]
Accuracy: 0.6989959631508126
F1-score: 0.5770021817266819
