In [6]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score


# file Path 
file_path = r'INSERT HERE'
df = pd.read_excel(file_path)

# Industries 
industries = ['A', 'C', 'F', 'G', 'H', 'I', 'J', 'L', 'M', 'N', 'P', 'Q', 'R']

# individual dataset
for industry in industries:
    industry_df = df[df['Industry'] == industry]
    
    # Feature & Target
    X = industry_df[['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10',
                    'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19',
                    'X20', 'X21', 'X22', 'X23', 'X24', 'X25', 'X26', 'X27']]
    y = industry_df['status']

    # Splitting
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=99, stratify=y)

    # XGB
    xg_clf = xgb.XGBClassifier(objective='binary:logistic', colsample_bytree=0.3, learning_rate=0.05,
                               max_depth=5, alpha=10, n_estimators=200)

    # Training
    xg_clf.fit(X_train, y_train)

    # Predicting
    y_pred = xg_clf.predict(X_test)
    y_pred_proba = xg_clf.predict_proba(X_test)[:, 1]

    # Results
    auc_roc = roc_auc_score(y_test, y_pred_proba)
    print("Industry:", industry)
    print("AUC-ROC Score:", auc_roc)

    # confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    # accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)


Industry: A
AUC-ROC Score: 0.7811827956989248
Confusion Matrix:
[[19 11]
 [ 6 25]]
Accuracy: 0.7213114754098361
Industry: C
AUC-ROC Score: 0.8266883116883117
Confusion Matrix:
[[137  38]
 [ 40 136]]
Accuracy: 0.7777777777777778
Industry: F
AUC-ROC Score: 0.8306115382441065
Confusion Matrix:
[[386 146]
 [118 414]]
Accuracy: 0.7518796992481203
Industry: G
AUC-ROC Score: 0.8410657373554498
Confusion Matrix:
[[497 156]
 [147 506]]
Accuracy: 0.7679938744257274
Industry: H
AUC-ROC Score: 0.8568011143410853
Confusion Matrix:
[[ 97  31]
 [ 26 103]]
Accuracy: 0.7782101167315175
Industry: I
AUC-ROC Score: 0.7577039930555556
Confusion Matrix:
[[196  92]
 [ 80 208]]
Accuracy: 0.7013888888888888
Industry: J
AUC-ROC Score: 0.796015112631879
Confusion Matrix:
[[113  54]
 [ 30 138]]
Accuracy: 0.7492537313432835
Industry: L
AUC-ROC Score: 0.8054846938775511
Confusion Matrix:
[[33 15]
 [10 39]]
Accuracy: 0.7422680412371134
Industry: M
AUC-ROC Score: 0.8535998498912816
Confusion Matrix:
[[232  69]
 [ 66 