In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/credit-score-classification-86-features/cibil dataset.xlsx
/kaggle/input/credit-score-classification-86-features/cibil_and_bank.xlsx
/kaggle/input/credit-score-classification-86-features/only_imp_final_features.xlsx
/kaggle/input/credit-score-classification-86-features/bank internal dataset.xlsx


## Summary of Steps

1. **Data Preparation**:
   - Load data from Excel files and merge them.
   - Handle missing values by dropping rows with specific values.

2. **Feature Selection**:
   - Use chi-square test for categorical features and ANOVA for numerical features.
   - Retain features with significant associations with the target variable.

3. **Data Encoding**:
   - Label encode categorical features and perform ordinal encoding for 'EDUCATION'.
   - Apply one-hot encoding to convert categorical features into binary format.

4. **Machine Learning Model Fitting**:
   - Train Random Forest, Decision Tree, and XGBoost classifiers.
   - Evaluate model performance using accuracy, precision, recall, and F1-score.

5. **Model Improvement**:
   - Explore hyperparameter tuning for the XGBoost classifier using techniques like grid search.
   - Perform a wide search over combinations of hyperparameters, e.g., around 900 combinations, to find optimal settings for the XGBoost model.

6. **XGBoost Model Improvement**:

This XGBoost setup stands out:

- **colsample_bytree:** 0.9
- **learning_rate:** 1
- **max_depth:** 3
- **alpha:** 1
- **n_estimators:** 50

Achieving 81% accuracy on the training set and 78% on the testing set.



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from scipy.stats import chi2_contingency
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
import warnings
import os


1. **Data Preparation**:

In [3]:
df1 = pd.read_excel("/kaggle/input/credit-score-classification-86-features/bank internal dataset.xlsx")
df2 = pd.read_excel("/kaggle/input/credit-score-classification-86-features/cibil dataset.xlsx")

In [4]:
df1.shape
df2.shape

(51336, 62)

In [5]:
df1.columns
df2.columns

Index(['PROSPECTID', 'time_since_recent_payment',
       'time_since_first_deliquency', 'time_since_recent_deliquency',
       'num_times_delinquent', 'max_delinquency_level',
       'max_recent_level_of_deliq', 'num_deliq_6mts', 'num_deliq_12mts',
       'num_deliq_6_12mts', 'max_deliq_6mts', 'max_deliq_12mts',
       'num_times_30p_dpd', 'num_times_60p_dpd', 'num_std', 'num_std_6mts',
       'num_std_12mts', 'num_sub', 'num_sub_6mts', 'num_sub_12mts', 'num_dbt',
       'num_dbt_6mts', 'num_dbt_12mts', 'num_lss', 'num_lss_6mts',
       'num_lss_12mts', 'recent_level_of_deliq', 'tot_enq', 'CC_enq',
       'CC_enq_L6m', 'CC_enq_L12m', 'PL_enq', 'PL_enq_L6m', 'PL_enq_L12m',
       'time_since_recent_enq', 'enq_L12m', 'enq_L6m', 'enq_L3m',
       'MARITALSTATUS', 'EDUCATION', 'AGE', 'GENDER', 'NETMONTHLYINCOME',
       'Time_With_Curr_Empr', 'pct_of_active_TLs_ever',
       'pct_opened_TLs_L6m_of_L12m', 'pct_currentBal_all_TL', 'CC_utilization',
       'CC_Flag', 'PL_utilization', 'PL_Fla

In [6]:
# We noticed that the Age_Oldest_TL column has a lot of missing values, so we're going to remove it from the dataset.
df1 = df1.loc[df1['Age_Oldest_TL'] != -99999 ]

In [7]:
# Removes columns that contain more than 10,000 instances of the null value.
coloums_to_be_removed = []

for i in df2.columns:
    if df2.loc[df2[i] == -99999].shape[0] > 10000:
        coloums_to_be_removed.append(i)
        
df2 = df2.drop(coloums_to_be_removed , axis = 1)

df2.shape

(51336, 54)

In [8]:
# Removes rows with null values for any column.
for i in df2.columns:
    df2 = df2.loc[df2[i] != -99999]

df2.shape

(42066, 54)

In [9]:
df1.isna().sum()


PROSPECTID              0
Total_TL                0
Tot_Closed_TL           0
Tot_Active_TL           0
Total_TL_opened_L6M     0
Tot_TL_closed_L6M       0
pct_tl_open_L6M         0
pct_tl_closed_L6M       0
pct_active_tl           0
pct_closed_tl           0
Total_TL_opened_L12M    0
Tot_TL_closed_L12M      0
pct_tl_open_L12M        0
pct_tl_closed_L12M      0
Tot_Missed_Pmnt         0
Auto_TL                 0
CC_TL                   0
Consumer_TL             0
Gold_TL                 0
Home_TL                 0
PL_TL                   0
Secured_TL              0
Unsecured_TL            0
Other_TL                0
Age_Oldest_TL           0
Age_Newest_TL           0
dtype: int64

In [10]:
df2.isna().sum()

PROSPECTID                    0
time_since_recent_payment     0
num_times_delinquent          0
max_recent_level_of_deliq     0
num_deliq_6mts                0
num_deliq_12mts               0
num_deliq_6_12mts             0
num_times_30p_dpd             0
num_times_60p_dpd             0
num_std                       0
num_std_6mts                  0
num_std_12mts                 0
num_sub                       0
num_sub_6mts                  0
num_sub_12mts                 0
num_dbt                       0
num_dbt_6mts                  0
num_dbt_12mts                 0
num_lss                       0
num_lss_6mts                  0
num_lss_12mts                 0
recent_level_of_deliq         0
tot_enq                       0
CC_enq                        0
CC_enq_L6m                    0
CC_enq_L12m                   0
PL_enq                        0
PL_enq_L6m                    0
PL_enq_L12m                   0
time_since_recent_enq         0
enq_L12m                      0
enq_L6m 

In [11]:
# Finding which columns are common between the datasets.
for i in list(df1.columns):
    if i in list(df2.columns):
        print(i)

PROSPECTID


In [12]:
df = pd.merge(df1 , df2 , how="inner" , left_on = ["PROSPECTID"] , right_on = ["PROSPECTID"])

In [13]:
df.shape

(42064, 79)

In [14]:
#df.to_excel('cibil_and_bank.xlsx', index=False)

2. **Feature Selection**:

In [15]:
# Columns in df that contain categorical data
for i in df.columns:
    if df[i].dtype == 'object':
        print(i)

MARITALSTATUS
EDUCATION
GENDER
last_prod_enq2
first_prod_enq2
Approved_Flag


In [16]:
from scipy.stats import chi2_contingency

In [17]:
# Every feature tested shows Association with the 'Approved_Flag', so we will retain all of them.
for i in ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']:
    chi2, pval, _, _ = chi2_contingency(pd.crosstab(df[i], df['Approved_Flag']))
    print(i, '---', pval)


MARITALSTATUS --- 3.578180861038862e-233
EDUCATION --- 2.6942265249737532e-30
GENDER --- 1.907936100186563e-05
last_prod_enq2 --- 0.0
first_prod_enq2 --- 7.84997610555419e-287


In [18]:
numeric_columns = []
for i in df.columns:
    if df[i].dtype != 'object' and i not in ['PROSPECTID','Approved_Flag']:
        numeric_columns.append(i)

In [19]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [20]:
vif_data = df[numeric_columns]
total_columns = vif_data.shape[1]
columns_to_be_kept = []
column_index = 0

In [21]:
for i in range (0,total_columns):
    
    vif_value = variance_inflation_factor(vif_data, column_index)
    print (column_index,'---',vif_value)
    
    
    if vif_value <= 6:
        columns_to_be_kept.append( numeric_columns[i] )
        column_index = column_index+1
    
    else:
        vif_data = vif_data.drop([ numeric_columns[i] ] , axis=1)

  vif = 1. / (1. - r_squared_i)


0 --- inf


  vif = 1. / (1. - r_squared_i)


0 --- inf
0 --- 11.320180023967996
0 --- 8.363698035000336
0 --- 6.520647877790928
0 --- 5.149501618212625
1 --- 2.611111040579735


  vif = 1. / (1. - r_squared_i)


2 --- inf
2 --- 1788.7926256209232
2 --- 8.601028256477228
2 --- 3.8328007921530785
3 --- 6.0996533816467355
3 --- 5.581352009642762
4 --- 1.985584353098778


  vif = 1. / (1. - r_squared_i)


5 --- inf
5 --- 4.809538302819343
6 --- 23.270628983464636
6 --- 30.595522588100053
6 --- 4.3843464059655854
7 --- 3.0646584155234238
8 --- 2.898639771299252
9 --- 4.377876915347322
10 --- 2.207853583695844
11 --- 4.916914200506864
12 --- 5.214702030064725
13 --- 3.3861625024231476
14 --- 7.840583309478997
14 --- 5.255034641721434


  vif = 1. / (1. - r_squared_i)


15 --- inf
15 --- 7.380634506427232
15 --- 1.4210050015175733
16 --- 8.083255010190323
16 --- 1.6241227524040114
17 --- 7.257811920140003
17 --- 15.59624383268298
17 --- 1.825857047132431
18 --- 1.5080839450032661
19 --- 2.172088834824578
20 --- 2.6233975535272305
21 --- 2.2959970812106167
22 --- 7.360578319196446
22 --- 2.1602387773102554
23 --- 2.8686288267891467
24 --- 6.458218003637272
24 --- 2.8474118865638265
25 --- 4.7531981562840855
26 --- 16.227354755948223
26 --- 6.424377256363877
26 --- 8.887080381808687
26 --- 2.3804746142952653
27 --- 8.609513476514548
27 --- 13.06755093547673
27 --- 3.5000400566546555
28 --- 1.908795587481377
29 --- 17.006562234161628
29 --- 10.730485153719197
29 --- 2.3538497522950275
30 --- 22.104855915136433
30 --- 2.7971639638512906
31 --- 3.424171203217696
32 --- 10.175021454450935
32 --- 6.408710354561301
32 --- 1.001151196262562
33 --- 3.069197305397274
34 --- 2.8091261600643724
35 --- 20.249538381980678
35 --- 15.864576541593745
35 --- 1.833164974

In [22]:
columns_to_be_kept

['pct_tl_open_L6M',
 'pct_tl_closed_L6M',
 'Tot_TL_closed_L12M',
 'pct_tl_closed_L12M',
 'Tot_Missed_Pmnt',
 'CC_TL',
 'Home_TL',
 'PL_TL',
 'Secured_TL',
 'Unsecured_TL',
 'Other_TL',
 'Age_Oldest_TL',
 'Age_Newest_TL',
 'time_since_recent_payment',
 'max_recent_level_of_deliq',
 'num_deliq_6_12mts',
 'num_times_60p_dpd',
 'num_std_12mts',
 'num_sub',
 'num_sub_6mts',
 'num_sub_12mts',
 'num_dbt',
 'num_dbt_12mts',
 'num_lss',
 'num_lss_12mts',
 'recent_level_of_deliq',
 'CC_enq_L12m',
 'PL_enq_L12m',
 'time_since_recent_enq',
 'enq_L3m',
 'NETMONTHLYINCOME',
 'Time_With_Curr_Empr',
 'pct_currentBal_all_TL',
 'CC_Flag',
 'PL_Flag',
 'pct_PL_enq_L6m_of_ever',
 'pct_CC_enq_L6m_of_ever',
 'HL_Flag',
 'GL_Flag']

In [23]:
# Final features
features = columns_to_be_kept + ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']
df = df[features + ['Approved_Flag']]

3. **Data Encoding**:

In [24]:
# Label encoding for the categorical features
['MARITALSTATUS', 'EDUCATION', 'GENDER' , 'last_prod_enq2' ,'first_prod_enq2']

['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']

In [25]:

df['MARITALSTATUS'].unique()    
df['EDUCATION'].unique()
df['GENDER'].unique()
df['last_prod_enq2'].unique()
df['first_prod_enq2'].unique()


array(['PL', 'ConsumerLoan', 'others', 'AL', 'HL', 'CC'], dtype=object)

In [26]:
df.loc[df['EDUCATION'] == 'SSC',['EDUCATION']]              = 1
df.loc[df['EDUCATION'] == '12TH',['EDUCATION']]             = 2
df.loc[df['EDUCATION'] == 'GRADUATE',['EDUCATION']]         = 3
df.loc[df['EDUCATION'] == 'UNDER GRADUATE',['EDUCATION']]   = 3
df.loc[df['EDUCATION'] == 'POST-GRADUATE',['EDUCATION']]    = 4
df.loc[df['EDUCATION'] == 'OTHERS',['EDUCATION']]           = 1
df.loc[df['EDUCATION'] == 'PROFESSIONAL',['EDUCATION']]     = 3

In [27]:
df['EDUCATION'].value_counts()
df['EDUCATION'] = df['EDUCATION'].astype(int)

In [28]:
df_encoded = pd.get_dummies(df, columns=['MARITALSTATUS','GENDER', 'last_prod_enq2' ,'first_prod_enq2'])



df_encoded.info()
k = df_encoded.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42064 entries, 0 to 42063
Data columns (total 57 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   pct_tl_open_L6M               42064 non-null  float64
 1   pct_tl_closed_L6M             42064 non-null  float64
 2   Tot_TL_closed_L12M            42064 non-null  int64  
 3   pct_tl_closed_L12M            42064 non-null  float64
 4   Tot_Missed_Pmnt               42064 non-null  int64  
 5   CC_TL                         42064 non-null  int64  
 6   Home_TL                       42064 non-null  int64  
 7   PL_TL                         42064 non-null  int64  
 8   Secured_TL                    42064 non-null  int64  
 9   Unsecured_TL                  42064 non-null  int64  
 10  Other_TL                      42064 non-null  int64  
 11  Age_Oldest_TL                 42064 non-null  int64  
 12  Age_Newest_TL                 42064 non-null  int64  
 13  t

In [29]:
k

Unnamed: 0,pct_tl_open_L6M,pct_tl_closed_L6M,Tot_TL_closed_L12M,pct_tl_closed_L12M,Tot_Missed_Pmnt,CC_TL,Home_TL,PL_TL,Secured_TL,Unsecured_TL,...,NETMONTHLYINCOME,Time_With_Curr_Empr,pct_currentBal_all_TL,CC_Flag,PL_Flag,pct_PL_enq_L6m_of_ever,pct_CC_enq_L6m_of_ever,HL_Flag,GL_Flag,EDUCATION
count,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,...,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0,42064.0
mean,0.179032,0.097783,0.825504,0.160365,0.525746,0.145921,0.076241,0.328,2.921334,2.341646,...,26929.9,110.345783,0.883693,0.102962,0.193063,0.195497,0.064186,0.252235,0.05658,2.313689
std,0.278043,0.210957,1.537208,0.258831,1.106442,0.549314,0.358582,0.916368,6.379764,3.405397,...,20843.0,75.629967,40.622275,0.303913,0.394707,0.367414,0.225989,0.4343,0.231042,0.87107
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,18000.0,61.0,0.152,0.0,0.0,0.0,0.0,0.0,0.0,2.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,24000.0,92.0,0.6,0.0,0.0,0.0,0.0,0.0,0.0,2.0
75%,0.333,0.1,1.0,0.25,1.0,0.0,0.0,0.0,3.0,3.0,...,31000.0,131.0,0.86,0.0,0.0,0.0,0.0,1.0,0.0,3.0
max,1.0,1.0,33.0,1.0,34.0,27.0,10.0,29.0,235.0,55.0,...,2500000.0,1020.0,6327.5,1.0,1.0,1.0,1.0,1.0,1.0,4.0


In [30]:
#df_encoded.to_excel('only_imp_final_features.xlsx', index=False)

4. **Machine Learning Model Fitting**:

In [31]:
from sklearn.preprocessing import StandardScaler

In [32]:
y = df_encoded['Approved_Flag']
x = df_encoded.drop(['Approved_Flag'], axis=1)

In [33]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [34]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [35]:
# 1. Random Forest
rf_classifier = RandomForestClassifier(n_estimators = 200, random_state=42)
rf_classifier.fit(x_train, y_train)

y_pred = rf_classifier.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)

print ()
print(f'Accuracy: {accuracy}')
print ()
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 0.7626292642339237

Class p1:
Precision: 0.8385167464114832
Recall: 0.6913214990138067
F1 Score: 0.7578378378378378

Class p2:
Precision: 0.7924464677120215
Recall: 0.931615460852329
F1 Score: 0.8564139941690961

Class p3:
Precision: 0.4448051948051948
Recall: 0.20679245283018868
F1 Score: 0.28232869654817105

Class p4:
Precision: 0.7194174757281553
Recall: 0.7201166180758017
F1 Score: 0.7197668771248178



In [36]:
# 2.Xgboost
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

In [37]:
xgb_classifier = xgb.XGBClassifier(objective='multi:softmax',  num_class=4)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

In [38]:
xgb_classifier.fit(x_train, y_train)
y_pred = xgb_classifier.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print ()
print(f'Accuracy: {accuracy:.2f}')
print ()

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 0.77

Class p1:
Precision: 0.8275862068965517
Recall: 0.757396449704142
F1 Score: 0.7909371781668384

Class p2:
Precision: 0.8219885958660014
Recall: 0.914370664023786
F1 Score: 0.8657220606174346

Class p3:
Precision: 0.4447004608294931
Recall: 0.29132075471698116
F1 Score: 0.35202918376652986

Class p4:
Precision: 0.7283582089552239
Recall: 0.7113702623906706
F1 Score: 0.7197640117994101



In [39]:
# 3.decision tree C.
from sklearn.tree import DecisionTreeClassifier



In [40]:
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10)
dt_model.fit(x_train, y_train)
y_pred = dt_model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print ()
print(f"Accuracy: {accuracy:.2f}")
print ()

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 0.71

Class p1:
Precision: 0.7217741935483871
Recall: 0.7061143984220908
F1 Score: 0.7138584247258226

Class p2:
Precision: 0.8062645011600929
Recall: 0.8265609514370664
F1 Score: 0.8162865811882157

Class p3:
Precision: 0.34755134281200634
Recall: 0.3320754716981132
F1 Score: 0.3396372057120803

Class p4:
Precision: 0.6449643947100712
Recall: 0.6161321671525753
F1 Score: 0.6302186878727635



## Since XGBoost is working the best among our options, let's just tweak its parameter to make it work even better.

5. **Model Improvement**:

In [41]:
# Hyperparameter tuning for xgboost (Used in the session)

# Define the hyperparameter grid
param_grid = {
'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9],
'learning_rate'   : [0.001, 0.01, 0.1,0.5, 1],
'max_depth'       : [3, 5, 8, 10],
'alpha'           : [1, 10, 100],
'n_estimators'    : [10,50,100]
}

index = 0

answers_grid = {
 'combination'       :[],
 'train_Accuracy'    :[],
 'test_Accuracy'     :[],
 'colsample_bytree'  :[],
 'learning_rate'     :[],
 'max_depth'         :[],
 'alpha'             :[],
 'n_estimators'      :[]

 }

In [42]:
for colsample_bytree in param_grid['colsample_bytree']:
   for learning_rate in param_grid['learning_rate']:
     for max_depth in param_grid['max_depth']:
       for alpha in param_grid['alpha']:
           for n_estimators in param_grid['n_estimators']:

               index = index + 1

               # Define and train the XGBoost model
               model = xgb.XGBClassifier(objective='multi:softmax',  
                                        num_class=4,
                                        colsample_bytree = colsample_bytree,
                                        learning_rate = learning_rate,
                                        max_depth = max_depth,
                                        alpha = alpha,
                                        n_estimators = n_estimators)



               y = df_encoded['Approved_Flag']
               x = df_encoded. drop ( ['Approved_Flag'], axis = 1 )

               label_encoder = LabelEncoder()
               y_encoded = label_encoder.fit_transform(y)


               x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)


               model.fit(x_train, y_train)



               # Predict on training and testing sets
               y_pred_train = model.predict(x_train)
               y_pred_test = model.predict(x_test)


               # Calculate train and test results
               train_accuracy =  accuracy_score (y_train, y_pred_train)
               test_accuracy  =  accuracy_score (y_test , y_pred_test)



               # Include into the lists
               answers_grid ['combination']   .append(index)
               answers_grid ['train_Accuracy']    .append(train_accuracy)
               answers_grid ['test_Accuracy']     .append(test_accuracy)
               answers_grid ['colsample_bytree']   .append(colsample_bytree)
               answers_grid ['learning_rate']      .append(learning_rate)
               answers_grid ['max_depth']          .append(max_depth)
               answers_grid ['alpha']              .append(alpha)
               answers_grid ['n_estimators']       .append(n_estimators)


               # Print results for this combination
               print(f"Combination {index}")
               print(f"colsample_bytree: {colsample_bytree}, learning_rate: {learning_rate}, max_depth: {max_depth}, alpha: {alpha}, n_estimators: {n_estimators}")
               print(f"Train Accuracy: {train_accuracy:.2f}")
               print(f"Test Accuracy : {test_accuracy :.2f}")
               print("-" * 30)

Combination 1
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 2
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 3
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 4
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 5
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 6
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.61
Test Accu