### IMPORTING ALL REQUIRE LIBRARIES

In [291]:
import pandas as pd
import numpy as np
import sklearn
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from feature_engine.encoding import RareLabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support



sklearn.set_config(transform_output='pandas')

### IMPORTING DATASET

In [249]:
df = pd.read_csv("Data//FINAL.csv")

In [250]:
df.shape

(42064, 44)

In [251]:
df = df.drop(columns=['Unnamed: 0'])

In [252]:
# PIPELINE OVERVIEW :-

# MARITALSTATUS     -->"Married"->1 or "Single"->0
# EDUCATION         -->LevelEncoding
# GENDER            -->OneHotEncoding
# last_prod_enq2    -->rare_encoding, onehotencoding
# first_prod_enq2   -->rare_encoding, onehotencoding
# pct_tl_open_L6M, pct_tl_closed_L6M, Tot_TL_closed_L12M, pct_CC_enq_L6m_of_ever, pct_PL_enq_L6m_of_ever, PL_Flag, CC_Flag, HL_Flag, GL_Flag --> No Scaling
# all other numerical Columns -->Log Transformation

### DATA SPLITTING

In [253]:
X = df.drop(columns=['Approved_Flag'])
y = df['Approved_Flag']

In [254]:
print("X-shape :-", X.shape)
print("X-shape :-", y.shape)

X-shape :- (42064, 42)
X-shape :- (42064,)


In [255]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [256]:
print("X-TRAIN SHAPE :- ", x_train.shape)
print("X-TEST SHAPE :- ", x_test.shape)
print("Y-TRAIN SHAPE :- ", y_train.shape)
print("Y-TEST SHAPE :- ", y_test.shape)

X-TRAIN SHAPE :-  (33651, 42)
X-TEST SHAPE :-  (8413, 42)
Y-TRAIN SHAPE :-  (33651,)
Y-TEST SHAPE :-  (8413,)


### TRANSFORMATION OPERATIONS

In [257]:
y_test = y_test.map({"P1":0, "P2":1, "P3":2, "P4":3})
y_train = y_train.map({"P1":0, "P2":1, "P3":2, "P4":3})

#### 1. MARITALSTATUS

In [258]:
x_train['MARITALSTATUS'] = x_train['MARITALSTATUS'].map({"Married":1, "Single":0})

In [259]:
x_train['MARITALSTATUS'].unique()

array([1, 0], dtype=int64)

#### 2. EDUCATION

In [260]:
# Ordinal feature -- EDUCATION
# SSC            : 1
# 12TH           : 2
# GRADUATE       : 3
# UNDER GRADUATE : 3
# POST-GRADUATE  : 4
# OTHERS         : 1
# PROFESSIONAL   : 4

In [261]:
x_train['EDUCATION'].unique()

array(['GRADUATE', '12TH', 'POST-GRADUATE', 'UNDER GRADUATE', 'SSC',
       'OTHERS', 'PROFESSIONAL'], dtype=object)

In [262]:
x_train['EDUCATION'].value_counts(normalize=True)

EDUCATION
GRADUATE          0.334463
12TH              0.279100
SSC               0.173160
UNDER GRADUATE    0.108972
OTHERS            0.054085
POST-GRADUATE     0.045080
PROFESSIONAL      0.005141
Name: proportion, dtype: float64

In [263]:
x_train['EDUCATION'] = x_train['EDUCATION'].map({"GRADUATE":3, "12TH":2, "POST-GRADUATE":4, "UNDER GRADUATE":3,
                                                 "SSC":1, "OTHERS":4, "PROFESSIONAL":4})

In [264]:
x_train['EDUCATION'].unique()

array([3, 2, 4, 1], dtype=int64)

#### GENDER

In [265]:
x_train['GENDER'].unique()

array(['M', 'F'], dtype=object)

In [266]:
Gender_Transformer = Pipeline(steps=[
    ("OneHotEncoder", OneHotEncoder(handle_unknown='ignore',
                                   sparse_output=False))
])
Gender_Transformer.fit_transform(x_train.loc[:, ['GENDER']])

Unnamed: 0,GENDER_F,GENDER_M
29847,0.0,1.0
32223,0.0,1.0
28558,0.0,1.0
33720,0.0,1.0
41167,0.0,1.0
...,...,...
6265,0.0,1.0
11284,0.0,1.0
38158,0.0,1.0
860,0.0,1.0


#### LAST PRODUCT ENQUIRY and FIRST PRODUCT ENQUIRY

In [267]:
x_train['last_prod_enq2'].unique()

array(['ConsumerLoan', 'HL', 'PL', 'others', 'CC', 'AL'], dtype=object)

In [268]:
x_train['last_prod_enq2'].value_counts(normalize=True)

last_prod_enq2
ConsumerLoan    0.392470
others          0.323557
PL              0.179519
CC              0.052688
AL              0.032154
HL              0.019613
Name: proportion, dtype: float64

In [269]:
Product_Enquiry_Transformer = Pipeline(steps=[
    ("OneHotEncoder", OneHotEncoder(handle_unknown="ignore",
                                   sparse_output=False))
])
Product_Enquiry_Transformer.fit_transform(x_train.loc[:, ['last_prod_enq2', 'first_prod_enq2']])

Unnamed: 0,last_prod_enq2_AL,last_prod_enq2_CC,last_prod_enq2_ConsumerLoan,last_prod_enq2_HL,last_prod_enq2_PL,last_prod_enq2_others,first_prod_enq2_AL,first_prod_enq2_CC,first_prod_enq2_ConsumerLoan,first_prod_enq2_HL,first_prod_enq2_PL,first_prod_enq2_others
29847,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
32223,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
28558,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
33720,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
41167,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
6265,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
11284,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
38158,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
860,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


#### NUMERICAL COLUMN TRANSFORMATION

In [270]:
not_require = ["pct_tl_open_L6M", "pct_tl_closed_L6M", "Tot_TL_closed_L12M", "pct_CC_enq_L6m_of_ever", "pct_PL_enq_L6m_of_ever", "PL_Flag", "CC_Flag", "HL_Flag", "GL_Flag", "MARITALSTATUS", "EDUCATION", "GENDER", "last_prod_enq2", "first_prod_enq2"]

In [271]:
require = []
for i in x_train.columns:
    
    if i not in list(not_require):
        require.append(i)

In [272]:
len(require)

28

In [273]:
require = []
for i in x_train.columns:
    
    if i not in list(not_require):
        require.append(i)
        
Log_Transformer = Pipeline(steps=[
    ("LogTransformer", FunctionTransformer(np.log1p, validate=True))
])
Log_Transformer.fit_transform(x_train.loc[:, require])

array([[ 0.        ,  0.        ,  1.09861229, ...,  1.09861229,
        10.30898599,  4.87519732],
       [ 0.06015392,  0.69314718,  0.        , ...,  0.        ,
        10.85901823,  4.27666612],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
        10.1266711 ,  5.52942909],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.69314718,
         9.43356392,  4.85203026],
       [ 0.28743204,  0.69314718,  0.        , ...,  1.60943791,
        10.1266711 ,  5.5174529 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
        10.46313191,  4.81218436]])

In [274]:
pd.set_option("display.max_columns", None)

### COLUMN TRANSFORMATION

In [275]:
Column_Transformer = ColumnTransformer(transformers=[
    ("GENDER", Gender_Transformer, ['GENDER']),
    ("PRODUCT_ENQUIRY", Product_Enquiry_Transformer, ['last_prod_enq2', 'first_prod_enq2']),
    ("NUMERICAL", Log_Transformer, require)
])
Column_Transformer.fit_transform(x_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41
29847,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,1.098612,1.098612,0.000000,1.386294,1.386294,0.693147,5.111988,3.178054,3.988984,3.433987,0.000000,0.0,2.397895,0.0,0.0,0.0,0.0,0.0,0.0,3.433987,0.000000,0.000000,1.098612,1.098612,10.308986,4.875197
32223,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.060154,0.693147,0.000000,1.791759,0.693147,3.465736,0.693147,2.995732,5.247024,1.386294,3.713572,3.433987,0.000000,0.0,2.197225,0.0,0.0,0.0,0.0,0.0,0.0,3.433987,0.000000,0.000000,5.634790,0.000000,10.859018,4.276666
28558,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.693147,0.693147,4.204693,2.564949,4.499810,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.693147,5.866468,0.000000,10.126671,5.529429
33720,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.693147,0.000000,0.000000,0.000000,0.693147,0.693147,0.000000,2.639057,1.098612,5.099866,2.484907,1.098612,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,2.484907,0.000000,1.098612,3.465736,1.098612,9.680406,3.295837
41167,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.000000,2.995732,2.995732,5.894403,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,3.806662,1.098612,10.043293,4.262680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6265,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.693147,1.098612,0.693147,0.693147,4.430817,1.386294,3.891820,0.000000,0.000000,0.0,1.098612,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.693147,0.693147,4.653960,0.000000,10.463132,5.043425
11284,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,0.693147,0.000000,0.000000,1.098612,0.693147,1.098612,2.484907,2.197225,3.737670,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,3.465736,1.386294,10.308986,4.859812
38158,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.000000,0.693147,1.791759,1.791759,4.304065,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.791759,0.693147,9.433564,4.852030
860,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.287432,0.693147,0.000000,0.000000,0.000000,0.000000,2.302585,1.098612,3.135494,1.609438,4.276666,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.693147,1.386294,3.044522,1.609438,10.126671,5.517453


In [276]:
preprocessor = Pipeline(steps=[
    ("ct", Column_Transformer)
])

X_Train_transformed = preprocessor.fit_transform(x_train)
X_Test_Transformed = preprocessor.fit_transform(x_test)

In [277]:
X_Train_transformed.shape

(33651, 42)

In [280]:
X_Train_transformed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41
29847,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,1.098612,1.098612,0.000000,1.386294,1.386294,0.693147,5.111988,3.178054,3.988984,3.433987,0.000000,0.0,2.397895,0.0,0.0,0.0,0.0,0.0,0.0,3.433987,0.000000,0.000000,1.098612,1.098612,10.308986,4.875197
32223,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.060154,0.693147,0.000000,1.791759,0.693147,3.465736,0.693147,2.995732,5.247024,1.386294,3.713572,3.433987,0.000000,0.0,2.197225,0.0,0.0,0.0,0.0,0.0,0.0,3.433987,0.000000,0.000000,5.634790,0.000000,10.859018,4.276666
28558,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.693147,0.693147,4.204693,2.564949,4.499810,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.693147,5.866468,0.000000,10.126671,5.529429
33720,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.693147,0.000000,0.000000,0.000000,0.693147,0.693147,0.000000,2.639057,1.098612,5.099866,2.484907,1.098612,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,2.484907,0.000000,1.098612,3.465736,1.098612,9.680406,3.295837
41167,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.000000,2.995732,2.995732,5.894403,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,3.806662,1.098612,10.043293,4.262680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6265,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.693147,1.098612,0.693147,0.693147,4.430817,1.386294,3.891820,0.000000,0.000000,0.0,1.098612,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.693147,0.693147,4.653960,0.000000,10.463132,5.043425
11284,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,0.693147,0.000000,0.000000,1.098612,0.693147,1.098612,2.484907,2.197225,3.737670,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,3.465736,1.386294,10.308986,4.859812
38158,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.693147,0.000000,0.693147,1.791759,1.791759,4.304065,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.791759,0.693147,9.433564,4.852030
860,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.287432,0.693147,0.000000,0.000000,0.000000,0.000000,2.302585,1.098612,3.135494,1.609438,4.276666,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.693147,1.386294,3.044522,1.609438,10.126671,5.517453


### MODEL TRAINING

#### RANDOM FOREST CLASSIFIER

In [313]:
model_Random_forest = Pipeline(steps=[
    ("FEATURE ENGINEERING", preprocessor),
    ("RANDOM FOREST", RandomForestClassifier(n_estimators = 200, random_state=42))
])

model_Random_forest.fit(x_train, y_train)

In [314]:
y_pred = model_Random_forest.predict(x_test)
XGB_accuracy = accuracy_score(y_test, y_pred)
print ()
print(f'Accuracy: {XGB_accuracy:.2f}')
print ()


Accuracy: 0.77



In [301]:
for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()

Class p1:
Precision: 0.8282485875706215
Recall: 0.722879684418146
F1 Score: 0.7719852553975777

Class p2:
Precision: 0.7969733038598877
Recall: 0.9290386521308226
F1 Score: 0.8579535053999634

Class p3:
Precision: 0.4444444444444444
Recall: 0.20528301886792452
F1 Score: 0.2808466701084151

Class p4:
Precision: 0.7333333333333333
Recall: 0.7376093294460642
F1 Score: 0.7354651162790697



#### XG-BOOST

In [302]:
model = Pipeline(steps=[
    ("pre", preprocessor),
    ("XG Boost", xgb.XGBClassifier(objective='multi:softmax',  num_class=4))
])

model.fit(x_train, y_train)

In [304]:
y_pred = model.predict(x_test)
XGB_accuracy = accuracy_score(y_test, y_pred)
print ()
print(f'Accuracy: {XGB_accuracy:.2f}')
print ()


Accuracy: 0.77



In [305]:
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()

Class p1:
Precision: 0.8164556962025317
Recall: 0.7633136094674556
F1 Score: 0.7889908256880734

Class p2:
Precision: 0.8210151380231523
Recall: 0.9137760158572844
F1 Score: 0.8649155722326454

Class p3:
Precision: 0.4531059683313033
Recall: 0.2807547169811321
F1 Score: 0.3466915191053122

Class p4:
Precision: 0.7327502429543246
Recall: 0.7327502429543246
F1 Score: 0.7327502429543246



#### DECISION TREE

In [307]:

model = Pipeline(steps=[
    ("FEATURE ENGINEERING", preprocessor),
    ("DECISION TREE", DecisionTreeClassifier(max_depth=20, min_samples_split=10))
])

model.fit(x_train, y_train)

In [308]:
y_pred = model.predict(x_test)

DT_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {DT_accuracy:.2f}")

Accuracy: 0.71


In [309]:
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

In [310]:
for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()

Class p1:
Precision: 0.7277085330776606
Recall: 0.7485207100591716
F1 Score: 0.7379679144385026

Class p2:
Precision: 0.8089778468713564
Recall: 0.8251734390485629
F1 Score: 0.8169953880875283

Class p3:
Precision: 0.34580960130187144
Recall: 0.32075471698113206
F1 Score: 0.3328112764291308

Class p4:
Precision: 0.6472361809045226
Recall: 0.6258503401360545
F1 Score: 0.6363636363636364



In [315]:
from sklearn.ensemble import GradientBoostingClassifier

In [316]:

model_GBC = Pipeline(steps=[
    ("FEATURE ENGINEERING", preprocessor),
    ("DECISION TREE", GradientBoostingClassifier())
])

model_GBC.fit(x_train, y_train)

In [317]:
y_pred = model_GBC.predict(x_test)

DT_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {DT_accuracy:.2f}")

Accuracy: 0.78


In [323]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier

# Initialize the model
model_GBC = GradientBoostingClassifier()

# Define the hyperparameter distribution
param_dist = {
    'n_estimators': [100, 200, 300, 400],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'subsample': [0.6, 0.8, 1.0],  
    'min_samples_split': [2, 3, 4],  
    'min_samples_leaf': [1, 2, 3],  
    'max_features': ['auto', 'sqrt', 'log2'],  
}

# Create the randomized search
random_search = RandomizedSearchCV(
    estimator=model_GBC,
    param_distributions=param_dist,
    n_iter=50,  
    scoring='accuracy',  
    cv=5,  
    verbose=3,
    n_jobs=-1,
    random_state=42
)

# Fit the random search
random_search.fit(X_Train_transformed, y_train)

# Output the best hyperparameters
print("Best Hyperparameters: ", random_search.best_params_)


Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best Hyperparameters:  {'subsample': 0.6, 'n_estimators': 400, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 5, 'learning_rate': 0.05}


In [326]:

model_GBC = Pipeline(steps=[
    ("FEATURE ENGINEERING", preprocessor),
    ("DECISION TREE", GradientBoostingClassifier(subsample= 0.6,
                                                 n_estimators= 400,
                                                 min_samples_split= 2,
                                                 min_samples_leaf= 1,
                                                 max_features= 'sqrt',
                                                 max_depth= 5,
                                                 learning_rate= 0.05)
    )
])

model_GBC.fit(x_train, y_train)

In [327]:
y_pred = model_GBC.predict(x_test)

DT_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {DT_accuracy:.2f}")

Accuracy: 0.77


In [329]:
model = Pipeline(steps=[
    ("pre", preprocessor),
    ("XG Boost", xgb.XGBClassifier(objective='multi:softmax',  num_class=4,learning_rate=0.2, max_depth=3, n_estimators=200 ))
])

model.fit(x_train, y_train)            

In [330]:
y_pred = model.predict(x_test)

DT_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {DT_accuracy:.2f}")

Accuracy: 0.78
