In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

cleaned = pd.read_csv(r'C:\Users\jatin\OneDrive\Desktop\Loan-Approval\data\cleaned\train.csv')

In [2]:
from sklearn.model_selection import train_test_split

X = cleaned.drop('loan_status', axis=1).copy()
y = cleaned['loan_status'].copy()

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
import lightgbm as lgb
from src.trainer import Trainer

# Instantiate LightGBM model
model = lgb.LGBMClassifier(
    n_estimators=1000,
    learning_rate=0.05,
    eval_metric="logloss",    
    verbose=100
)

# Trainer initialization
trainer = Trainer(model, run_name='LIGHTGBM')

# Fit the model with training and evaluation data
model = trainer.fit(train_X, train_y)

Training with default parameters
[LightGBM] [Info] Number of positive: 6708, number of negative: 40208
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.000145
[LightGBM] [Debug] init for col-wise cost 0.000011 seconds, init for row-wise cost 0.001759 seconds
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002410 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 935
[LightGBM] [Info] Number of data points in the train set: 46916, number of used features: 46
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.142979 -> initscore=-1.790765
[LightGBM] [Info] Start training from score -1.790765
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree wit

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 448.13it/s]


Starting cross-validation...
[LightGBM] [Info] Number of positive: 5366, number of negative: 32166
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.000140
[LightGBM] [Debug] init for col-wise cost 0.000002 seconds, init for row-wise cost 0.001077 seconds
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 932
[LightGBM] [Info] Number of data points in the train set: 37532, number of used features: 46
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.142971 -> initscore=-1.790827
[LightGBM] [Info] Start training from score -1.790827
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with lea

In [4]:
# Hyperparameter tuning

parms = {
    'n_estimators': [100, 200, 1000],
    'max_depth': [5, 10, 20],
    'learning_rate': [0.1, 0.01, 0.001]
}

Fine_Tuned_model = trainer.hyperparameter_tuning(train_X, train_y, parms)

Starting hyperparameter tuning for LIGHTGBM...
[LightGBM] [Info] Number of positive: 5366, number of negative: 32166
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.000140
[LightGBM] [Debug] init for col-wise cost 0.000004 seconds, init for row-wise cost 0.001214 seconds
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001712 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 932
[LightGBM] [Info] Number of data points in the train set: 37532, number of used features: 46
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.142971 -> initscore=-1.790827
[LightGBM] [Info] Start training from score -1.790827
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 30 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 30 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 5
[LightGBM] [Debug] Train



Best Parameters: {'learning_rate': 0.01, 'max_depth': 20, 'n_estimators': 1000}
Best ROC AUC: 0.9566388616642806
🏃 View run LIGHTGBM_tuning at: http://localhost:5000/#/experiments/0/runs/fcf76ed420cf4f0892659b30f6bb6de0
🧪 View experiment at: http://localhost:5000/#/experiments/0


In [5]:
# Evaluate the model
from src.evaluate import Evaluator

evaluator = Evaluator(Fine_Tuned_model, test_X, test_y, run_name='LIGHTGBM_tuned')

In [6]:
# Save the model

trainer.save_model(path=r"C:\Users\jatin\OneDrive\Desktop\Loan-Approval\models", model_name='Fine_Tuned_model[LightGBM]')

Model saved to C:\Users\jatin\OneDrive\Desktop\Loan-Approval\models/Fine_Tuned_model[LightGBM].


0