In [2]:
# 📦 Cài đặt và import các thư viện cần thiết
#!pip install lightgbm pandas scikit-learn matplotlib

import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_curve
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


In [3]:
# 📂 Đọc dữ liệu từ file CSV
df = pd.read_csv("super_merge.csv")
df.head()


Unnamed: 0,Classname,Classvalue,RED,GREEN,BLUE,Count,ORIG_FID,brightness,min_rgb,ndi_rb,range_rgb,whiteness2
0,Cloud,1,95,126,176,921858,1,4890,4676,-0.04141,404,808
1,Cloud,1,95,126,176,921858,2,5116,5084,-0.004677,72,144
2,Cloud,1,95,126,176,921858,3,4562,4488,-0.010146,132,264
3,Cloud,1,95,126,176,921858,4,5573,5508,-0.011131,124,248
4,Cloud,1,95,126,176,921858,5,3161,3076,0.018507,140,280


In [4]:
# 🧼 Tiền xử lý: chuẩn hóa và chia dữ liệu train/test
features = ['brightness', 'min_rgb', 'ndi_rb', 'range_rgb', 'whiteness2']
X = df[features]
y = df["Classvalue"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
# 🚀 Huấn luyện mô hình LightGBM
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params = {
    'objective': 'binary',
    'metric': ['binary_logloss', 'auc'],
    'verbosity': -1,
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

model = lgb.train(
    params,
    train_data,
    num_boost_round=100,
    valid_sets=[test_data],
    callbacks=[lgb.early_stopping(stopping_rounds=10)]
)



TypeError: train() got an unexpected keyword argument 'early_stopping_rounds'

In [None]:
# 📊 Dự đoán và đánh giá mô hình
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba >= 0.5).astype(int)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=3))
print("ROC AUC:", roc_auc_score(y_test, y_pred_proba))


In [None]:
# 📈 Vẽ biểu đồ Precision–Recall theo Threshold
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)

plt.figure(figsize=(6,5))
plt.plot(thresholds, precision[:-1], label="Precision")
plt.plot(thresholds, recall[:-1], label="Recall")
plt.xlabel("Threshold")
plt.ylabel("Score")
plt.title("Precision-Recall vs Threshold (LightGBM)")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# 💾 Lưu mô hình LightGBM
model.save_model("cloud_lightgbm_model.txt")
