# LightGBM (Light Gradient Boosting Machine)

In [1]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, average_precision_score, log_loss
from sklearn.metrics import cohen_kappa_score, confusion_matrix, brier_score_loss
from sklearn.metrics import matthews_corrcoef, fowlkes_mallows_score, r2_score
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_curve, roc_curve, top_k_accuracy_score

# Load the forest cover type dataset
data = fetch_covtype()
X, y = data.data, data.target

# Adjust labels to be in the range 0 to 6
y = y - 1

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a LightGBM model
model = lgb.LGBMClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
roc_auc = roc_auc_score(y_test, y_prob, multi_class='ovr')
pr_auc = average_precision_score(y_test, y_prob, average='macro')
log_loss_value = log_loss(y_test, y_prob)
kappa = cohen_kappa_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Calculate Brier score for each class and average it
num_classes = len(set(y))
brier_scores = [brier_score_loss(y_test == i, y_prob[:, i]) for i in range(num_classes)]
brier_score_avg = np.mean(brier_scores)

mcc = matthews_corrcoef(y_test, y_pred)
fmi = fowlkes_mallows_score(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Binarize the output
y_test_bin = label_binarize(y_test, classes=range(num_classes))

# Calculate precision-recall curve and ROC curve for each class
precision_dict, recall_dict, fpr_dict, tpr_dict = {}, {}, {}, {}
for i in range(num_classes):
    precision_dict[i], recall_dict[i], _ = precision_recall_curve(y_test_bin[:, i], y_prob[:, i])
    fpr_dict[i], tpr_dict[i], _ = roc_curve(y_test_bin[:, i], y_prob[:, i])
    auc = roc_auc_score(y_test_bin[:, i], y_prob[:, i])
    print(f"AUC-ROC for Class {i}: {auc:.3f}")

# Calculate top-k accuracy
top_k_acc = top_k_accuracy_score(y_test, y_prob, k=3)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-Score: {f1:.3f}")
print(f"AUC-ROC: {roc_auc:.3f}")
print(f"AUC-PR: {pr_auc:.3f}")
print(f"Log Loss: {log_loss_value:.3f}")
print(f"Cohen's Kappa: {kappa:.3f}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Average Brier Score: {brier_score_avg:.3f}")
print(f"Matthews Correlation Coefficient: {mcc:.3f}")
print(f"Fowlkes-Mallows Index: {fmi:.3f}")
print(f"Coefficient of Determination (R^2): {r2:.3f}")
print(f"Top-3 Accuracy: {top_k_acc:.3f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2262
[LightGBM] [Info] Number of data points in the train set: 464809, number of used features: 53
[LightGBM] [Info] Start training from score -1.010055
[LightGBM] [Info] Start training from score -0.717554
[LightGBM] [Info] Start training from score -2.787067
[LightGBM] [Info] Start training from score -5.343669
[LightGBM] [Info] Start training from score -4.126990
[LightGBM] [Info] Start training from score -3.511322
[LightGBM] [Info] Start training from score -3.338569
AUC-ROC for Class 0: 0.946
AUC-ROC for Class 1: 0.944
AUC-ROC for Class 2: 0.992
AUC-ROC for Class 3: 0.964
AUC-ROC for Class 4: 0.988
AUC-ROC for Class 5: 0.995
AUC-ROC for Class 6: 0.984
Accuracy: 0.851
Precision: 0.840
Recall: 0.811
F1-Score: 0.825
AUC-ROC: 0.973
AUC-PR: 0.855
Log Loss: 0.511
Cohen's Kappa: 0.760
Confusion Matr