In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import joblib

In [16]:
# Load the datasets
X = pd.read_csv(r'features.csv')
y = pd.read_csv(r'target.csv')

In [19]:
def grid_search(model, param_grid: dict, X_train, y_train):
    # Grid searches through a model and returns the best estimated model
    
    grid = GridSearchCV(
        estimator=model,  # Model to use
        param_grid=param_grid,  # Parameter Grid
        cv=2,  # Cross Validation
        scoring="accuracy",  # Scoring metric
        n_jobs=-1  # Use all available processors
    )
    grid.fit(X, y)
    
    model = grid.best_estimator_
    
    return model

In [20]:
# Split the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=0.25, random_state=42, stratify=y, shuffle=True)

# Create XGBoost Model
xgb_model = XGBClassifier()
# Define XG Boost parameter grid
param_grid_xgb = {
    'n_estimators': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7, 10],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2]
}

# xgb_model = grid_search(xgb_model, param_grid_xgb, X_train, y_train)
xgb_model.fit(X_train, y_train)

# Create LightGBM Model
light_gbm = LGBMClassifier()
# Define Light GBM parameter grid
param_grid_lgb = {
    'n_estimators': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'num_leaves': [31, 50, 100],
    'max_depth': [-1, 10, 20],
    'min_data_in_leaf': [1, 5, 10, 20],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# light_gbm = grid_search(light_gbm, param_grid_lgb, X_train, y_train)
light_gbm.fit(X_train, y_train)

# Create Neural Network
# nn_model = Sequential([
#     Input(shape=12),
#     Dense(units=200, activation="relu"),
#     Dense(units=400, activation="relu"),
#     Dropout(rate=0.1),
#     Dense(5, activation="relu"),
#     Dense(2, activation="sigmoid")
# ])

# # Compile and fit the Neural Network
# nn_model.compile(optimizer=Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
# nn_model.fit(x=X, y=y, validation_split=0.1, batch_size=10, epochs=200, shuffle=True, verbose=1)

[LightGBM] [Info] Number of positive: 5331, number of negative: 5331
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 925
[LightGBM] [Info] Number of data points in the train set: 10662, number of used features: 25
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


In [21]:
# Predict values
y_pred_xgb = xgb_model.predict(X_test)
y_pred_lgbm = light_gbm.predict(X_test)

# Create classification reports for both models
xgb_clf_report = classification_report(y_test, y_pred_xgb)
lgbm_clf_report = classification_report(y_test, y_pred_lgbm)

print(f"Classification Report for XGB Model:\n{xgb_clf_report}\n\n")
print(f"Classification Report for LGBM Model:\n{lgbm_clf_report}")

Classification Report for XGB Model:
              precision    recall  f1-score   support

           0       0.85      0.89      0.87      1777
           1       0.89      0.84      0.86      1777

    accuracy                           0.87      3554
   macro avg       0.87      0.87      0.87      3554
weighted avg       0.87      0.87      0.87      3554



Classification Report for LGBM Model:
              precision    recall  f1-score   support

           0       0.84      0.92      0.88      1777
           1       0.91      0.82      0.87      1777

    accuracy                           0.87      3554
   macro avg       0.88      0.87      0.87      3554
weighted avg       0.88      0.87      0.87      3554



In [22]:
joblib.dump(xgb_model, r"../CreditRiskApp/resources/model.pkl")  # Dump model for later use

['../CreditRiskApp/resources/model.pkl']