# LOAN INTEREST RATE PREDICTION - Hyperparameter Tuning
By Nohan Joemon

In [2]:
import pandas as pd
dataset = pd.read_csv('train.csv')
print(dataset.shape)
dataset.head()

(131447, 13)


Unnamed: 0,Loan_ID,Loan_Amount_Requested,Length_Employed,Home_Owner,Annual_Income,Income_Verified,Purpose_Of_Loan,Debt_To_Income,Inquiries_Last_6Mo,Number_Open_Accounts,Total_Accounts,Gender,Interest_Rate
0,10156551,18375,6,1,95000.0,2,1,21.45,3,17,28,1,3.0
1,10115057,4000,3,4,-1.0,2,2,21.64,0,18,33,0,2.0
2,10030395,15000,10,1,83000.0,2,2,22.9,3,11,44,1,3.0
3,10158544,27500,10,0,100000.0,1,2,9.66,0,5,14,1,2.0
4,10117658,9450,-1,5,28000.0,0,1,34.89,1,9,24,0,2.0


In [3]:
import optuna 
import joblib
from optuna import Trial, visualization
from optuna.samplers import TPESampler
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

def objective(trial: Trial,X,y) -> float:
    
    joblib.dump(study, 'study.pkl')
    
    train_X,test_X,train_y,test_y = train_test_split(X, y, test_size = 0.20,random_state = 101)

    param = {
                "n_estimators" : trial.suggest_int('n_estimators', 0, 1000),
                'max_depth':trial.suggest_int('max_depth', 2, 6),
                'reg_alpha':trial.suggest_int('reg_alpha', 0, 5),
                'reg_lambda':trial.suggest_int('reg_lambda', 0, 5),
                'min_child_weight':trial.suggest_int('min_child_weight', 0, 5),
                'gamma':trial.suggest_int('gamma', 0, 5),
                'learning_rate':trial.suggest_loguniform('learning_rate',0.005,0.5),
                'colsample_bytree':trial.suggest_discrete_uniform('colsample_bytree',0.1,1,0.01),
                'nthread' : -1
            }
    model = XGBClassifier(**param)

    model.fit(train_X,train_y)

    return cross_val_score(model,test_X,test_y).mean()

In [4]:
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
study = optuna.create_study(direction='maximize',sampler=TPESampler())
study.optimize(lambda trial : objective(trial,X,y),n_trials=50)

[32m[I 2021-05-23 15:45:31,354][0m A new study created in memory with name: no-name-16746b34-00a3-4148-ae57-c9ef1a08097a[0m
























[32m[I 2021-05-23 15:45:46,007][0m Trial 0 finished with value: 0.509014834537847 and parameters: {'n_estimators': 60, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 0, 'learning_rate': 0.01609364359362938, 'colsample_bytree': 0.41000000000000003}. Best is trial 0 with value: 0.509014834537847.[0m
























[32m[I 2021-05-23 15:47:33,110][0m Trial 1 finished with value: 0.5225180677063522 and parameters: {'n_estimators': 254, 'max_depth': 6, 'reg_alpha': 4, 'reg_lambda': 1, 'min_child_weight': 2, 'gamma': 4, 'learning_rate': 0.3934876695387218, 'colsample_bytree': 0.6}. Best is trial 1 with value: 0.5225180677063522.[0m
























[32m[I 2021-05-23 15:52:01,538][0m Trial 2 finished with value: 0.5203879802206163 and parameters: {'n_estimators': 802, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 0, 'min_child_weight': 4, 'gamma': 1, 'learning_rate': 0.008341639056878234, 'colsample_bytree': 0.97}. Best is trial 1 with value: 0.5225180677063522.[0m
























[32m[I 2021-05-23 15:53:43,739][0m Trial 3 finished with value: 0.5247242297451502 and parameters: {'n_estimators': 448, 'max_depth': 6, 'reg_alpha': 0, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 5, 'learning_rate': 0.070614662483113, 'colsample_bytree': 0.23}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 15:54:30,820][0m Trial 4 finished with value: 0.5208824648155193 and parameters: {'n_estimators': 325, 'max_depth': 3, 'reg_alpha': 0, 'reg_lambda': 1, 'min_child_weight': 1, 'gamma': 2, 'learning_rate': 0.026857502897765617, 'colsample_bytree': 0.32}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 15:58:08,826][0m Trial 5 finished with value: 0.5222137694941043 and parameters: {'n_estimators': 802, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 5, 'learning_rate': 0.015498033476475256, 'colsample_bytree': 0.62}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:00:24,419][0m Trial 6 finished with value: 0.5218714340053252 and parameters: {'n_estimators': 256, 'max_depth': 6, 'reg_alpha': 0, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 5, 'learning_rate': 0.020466911052714382, 'colsample_bytree': 0.95}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:01:39,823][0m Trial 7 finished with value: 0.507873716241917 and parameters: {'n_estimators': 749, 'max_depth': 2, 'reg_alpha': 0, 'reg_lambda': 1, 'min_child_weight': 5, 'gamma': 3, 'learning_rate': 0.008232829511964773, 'colsample_bytree': 0.22}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:05:18,489][0m Trial 8 finished with value: 0.5203119056675543 and parameters: {'n_estimators': 596, 'max_depth': 5, 'reg_alpha': 2, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 3, 'learning_rate': 0.007855102158679279, 'colsample_bytree': 0.65}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:05:25,567][0m Trial 9 finished with value: 0.5173830353746671 and parameters: {'n_estimators': 22, 'max_depth': 5, 'reg_alpha': 1, 'reg_lambda': 0, 'min_child_weight': 1, 'gamma': 0, 'learning_rate': 0.15845558887496336, 'colsample_bytree': 0.33999999999999997}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:06:48,044][0m Trial 10 finished with value: 0.5222518067706352 and parameters: {'n_estimators': 486, 'max_depth': 6, 'reg_alpha': 1, 'reg_lambda': 5, 'min_child_weight': 5, 'gamma': 5, 'learning_rate': 0.0691898662900832, 'colsample_bytree': 0.1}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:08:53,693][0m Trial 11 finished with value: 0.5234309623430963 and parameters: {'n_estimators': 281, 'max_depth': 6, 'reg_alpha': 5, 'reg_lambda': 4, 'min_child_weight': 0, 'gamma': 4, 'learning_rate': 0.41495138330581405, 'colsample_bytree': 0.74}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:11:47,400][0m Trial 12 finished with value: 0.5227082540890072 and parameters: {'n_estimators': 450, 'max_depth': 5, 'reg_alpha': 5, 'reg_lambda': 4, 'min_child_weight': 0, 'gamma': 4, 'learning_rate': 0.4051511900786437, 'colsample_bytree': 0.8}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:12:51,715][0m Trial 13 finished with value: 0.5242297451502472 and parameters: {'n_estimators': 137, 'max_depth': 6, 'reg_alpha': 5, 'reg_lambda': 3, 'min_child_weight': 0, 'gamma': 4, 'learning_rate': 0.13722036428337095, 'colsample_bytree': 0.74}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:13:47,181][0m Trial 14 finished with value: 0.5244199315329022 and parameters: {'n_estimators': 137, 'max_depth': 5, 'reg_alpha': 2, 'reg_lambda': 3, 'min_child_weight': 2, 'gamma': 4, 'learning_rate': 0.11888607914847249, 'colsample_bytree': 0.83}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:18:46,473][0m Trial 15 finished with value: 0.5242677824267783 and parameters: {'n_estimators': 990, 'max_depth': 5, 'reg_alpha': 1, 'reg_lambda': 3, 'min_child_weight': 2, 'gamma': 5, 'learning_rate': 0.06868735251245192, 'colsample_bytree': 0.45999999999999996}. Best is trial 3 with value: 0.5247242297451502.[0m
























[32m[I 2021-05-23 16:20:55,334][0m Trial 16 finished with value: 0.5262457208063902 and parameters: {'n_estimators': 644, 'max_depth': 5, 'reg_alpha': 2, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 3, 'learning_rate': 0.10842372022300814, 'colsample_bytree': 0.17}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:22:04,717][0m Trial 17 finished with value: 0.5221757322175733 and parameters: {'n_estimators': 623, 'max_depth': 3, 'reg_alpha': 2, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 2, 'learning_rate': 0.04484543212985569, 'colsample_bytree': 0.1}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:25:20,722][0m Trial 18 finished with value: 0.5204640547736783 and parameters: {'n_estimators': 971, 'max_depth': 5, 'reg_alpha': 1, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 1, 'learning_rate': 0.19356666768445005, 'colsample_bytree': 0.21000000000000002}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:26:13,644][0m Trial 19 finished with value: 0.5229745150247243 and parameters: {'n_estimators': 404, 'max_depth': 3, 'reg_alpha': 3, 'reg_lambda': 4, 'min_child_weight': 4, 'gamma': 3, 'learning_rate': 0.2453677502285256, 'colsample_bytree': 0.2}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:29:47,490][0m Trial 20 finished with value: 0.5229745150247243 and parameters: {'n_estimators': 630, 'max_depth': 6, 'reg_alpha': 0, 'reg_lambda': 2, 'min_child_weight': 5, 'gamma': 2, 'learning_rate': 0.08933237696099638, 'colsample_bytree': 0.48}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:30:09,199][0m Trial 21 finished with value: 0.5155572461011791 and parameters: {'n_estimators': 146, 'max_depth': 5, 'reg_alpha': 2, 'reg_lambda': 3, 'min_child_weight': 2, 'gamma': 4, 'learning_rate': 0.10784385133433701, 'colsample_bytree': 0.1}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:33:55,307][0m Trial 22 finished with value: 0.5244199315329022 and parameters: {'n_estimators': 557, 'max_depth': 5, 'reg_alpha': 2, 'reg_lambda': 3, 'min_child_weight': 1, 'gamma': 3, 'learning_rate': 0.04494503566707104, 'colsample_bytree': 0.88}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:36:15,767][0m Trial 23 finished with value: 0.5241156333206543 and parameters: {'n_estimators': 723, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 1, 'gamma': 3, 'learning_rate': 0.03970304805769341, 'colsample_bytree': 0.29000000000000004}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:38:25,472][0m Trial 24 finished with value: 0.5230886268543172 and parameters: {'n_estimators': 883, 'max_depth': 5, 'reg_alpha': 1, 'reg_lambda': 3, 'min_child_weight': 2, 'gamma': 4, 'learning_rate': 0.23061462511250022, 'colsample_bytree': 0.16}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:41:01,452][0m Trial 25 finished with value: 0.5230505895777863 and parameters: {'n_estimators': 538, 'max_depth': 6, 'reg_alpha': 2, 'reg_lambda': 4, 'min_child_weight': 4, 'gamma': 1, 'learning_rate': 0.033907823602633225, 'colsample_bytree': 0.26}. Best is trial 16 with value: 0.5262457208063902.[0m
























[32m[I 2021-05-23 16:42:39,922][0m Trial 26 finished with value: 0.5264739444655763 and parameters: {'n_estimators': 381, 'max_depth': 5, 'reg_alpha': 3, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.07021493641780459, 'colsample_bytree': 0.38}. Best is trial 26 with value: 0.5264739444655763.[0m
























[32m[I 2021-05-23 16:44:02,637][0m Trial 27 finished with value: 0.5267782426778242 and parameters: {'n_estimators': 374, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.06324565665480808, 'colsample_bytree': 0.38}. Best is trial 27 with value: 0.5267782426778242.[0m
























[32m[I 2021-05-23 16:45:26,969][0m Trial 28 finished with value: 0.5275389882084442 and parameters: {'n_estimators': 377, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 2, 'learning_rate': 0.05754894103201038, 'colsample_bytree': 0.38}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:46:46,788][0m Trial 29 finished with value: 0.5257892734880183 and parameters: {'n_estimators': 353, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 0, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.05693977710799886, 'colsample_bytree': 0.39}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:48:01,448][0m Trial 30 finished with value: 0.5168124762267021 and parameters: {'n_estimators': 382, 'max_depth': 3, 'reg_alpha': 3, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 0, 'learning_rate': 0.013489908639223117, 'colsample_bytree': 0.51}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:50:25,457][0m Trial 31 finished with value: 0.5258273107645492 and parameters: {'n_estimators': 679, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 2, 'learning_rate': 0.08833817775028993, 'colsample_bytree': 0.4}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:51:08,375][0m Trial 32 finished with value: 0.5198934956257133 and parameters: {'n_estimators': 182, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 1, 'learning_rate': 0.029859902620626988, 'colsample_bytree': 0.35}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:53:04,425][0m Trial 33 finished with value: 0.5265119817421072 and parameters: {'n_estimators': 431, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 5, 'gamma': 2, 'learning_rate': 0.05491581864056015, 'colsample_bytree': 0.56}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:54:07,314][0m Trial 34 finished with value: 0.5245340433624952 and parameters: {'n_estimators': 312, 'max_depth': 3, 'reg_alpha': 4, 'reg_lambda': 1, 'min_child_weight': 5, 'gamma': 2, 'learning_rate': 0.05977372743696404, 'colsample_bytree': 0.52}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:55:08,009][0m Trial 35 finished with value: 0.5200456447318371 and parameters: {'n_estimators': 215, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 5, 'gamma': 1, 'learning_rate': 0.02363979988100297, 'colsample_bytree': 0.58}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:56:52,257][0m Trial 36 finished with value: 0.5263217953594522 and parameters: {'n_estimators': 426, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.08466801989454034, 'colsample_bytree': 0.44000000000000006}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:58:33,370][0m Trial 37 finished with value: 0.5254089007227083 and parameters: {'n_estimators': 498, 'max_depth': 3, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 1, 'learning_rate': 0.05165910441881827, 'colsample_bytree': 0.5700000000000001}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 16:59:27,974][0m Trial 38 finished with value: 0.5188284518828452 and parameters: {'n_estimators': 356, 'max_depth': 2, 'reg_alpha': 3, 'reg_lambda': 3, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.03607659544571095, 'colsample_bytree': 0.65}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:00:35,720][0m Trial 39 finished with value: 0.5208824648155193 and parameters: {'n_estimators': 287, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 1, 'min_child_weight': 5, 'gamma': 2, 'learning_rate': 0.01942238513234208, 'colsample_bytree': 0.37}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:02:39,597][0m Trial 40 finished with value: 0.5272727272727272 and parameters: {'n_estimators': 462, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 1, 'learning_rate': 0.07637980760536581, 'colsample_bytree': 0.42000000000000004}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:04:27,971][0m Trial 41 finished with value: 0.5267021681247622 and parameters: {'n_estimators': 439, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 1, 'learning_rate': 0.07802295210847564, 'colsample_bytree': 0.43000000000000005}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:06:19,003][0m Trial 42 finished with value: 0.5232027386839102 and parameters: {'n_estimators': 460, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 2, 'min_child_weight': 3, 'gamma': 0, 'learning_rate': 0.08419483296159233, 'colsample_bytree': 0.44000000000000006}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:08:06,933][0m Trial 43 finished with value: 0.5273488018257892 and parameters: {'n_estimators': 539, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 1, 'learning_rate': 0.049362333600470515, 'colsample_bytree': 0.31}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:09:33,949][0m Trial 44 finished with value: 0.5268923545074172 and parameters: {'n_estimators': 549, 'max_depth': 3, 'reg_alpha': 5, 'reg_lambda': 1, 'min_child_weight': 2, 'gamma': 1, 'learning_rate': 0.15773717515382651, 'colsample_bytree': 0.31}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:11:01,523][0m Trial 45 finished with value: 0.5228223659186002 and parameters: {'n_estimators': 550, 'max_depth': 3, 'reg_alpha': 5, 'reg_lambda': 0, 'min_child_weight': 2, 'gamma': 0, 'learning_rate': 0.14839329118723601, 'colsample_bytree': 0.30000000000000004}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:12:23,160][0m Trial 46 finished with value: 0.5252567516165842 and parameters: {'n_estimators': 507, 'max_depth': 3, 'reg_alpha': 5, 'reg_lambda': 1, 'min_child_weight': 2, 'gamma': 1, 'learning_rate': 0.3122603507935569, 'colsample_bytree': 0.27}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:13:33,785][0m Trial 47 finished with value: 0.5253328261696463 and parameters: {'n_estimators': 588, 'max_depth': 2, 'reg_alpha': 5, 'reg_lambda': 0, 'min_child_weight': 2, 'gamma': 1, 'learning_rate': 0.12628777463582286, 'colsample_bytree': 0.33}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:14:55,395][0m Trial 48 finished with value: 0.5246481551920883 and parameters: {'n_estimators': 506, 'max_depth': 3, 'reg_alpha': 5, 'reg_lambda': 1, 'min_child_weight': 3, 'gamma': 0, 'learning_rate': 0.16142982794326527, 'colsample_bytree': 0.25}. Best is trial 28 with value: 0.5275389882084442.[0m
























[32m[I 2021-05-23 17:16:06,775][0m Trial 49 finished with value: 0.509471281856219 and parameters: {'n_estimators': 328, 'max_depth': 4, 'reg_alpha': 4, 'reg_lambda': 0, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.005183846082200632, 'colsample_bytree': 0.31}. Best is trial 28 with value: 0.5275389882084442.[0m


In [5]:
print('Best trial: score {},\nparams {}'.format(study.best_trial.value,study.best_trial.params))

Best trial: score 0.5275389882084442,
params {'n_estimators': 377, 'max_depth': 4, 'reg_alpha': 3, 'reg_lambda': 2, 'min_child_weight': 4, 'gamma': 2, 'learning_rate': 0.05754894103201038, 'colsample_bytree': 0.38}
