# Credit Card Default Prediction

* We are creating this file for understanding the steps need to be implemented in our End to End ML project.

### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,LabelEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score,classification_report,roc_auc_score,confusion_matrix,precision_score,recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import pickle
from sklearn.model_selection import cross_val_score
warnings.filterwarnings("ignore")

##Displaying all the columns of the dataframes
pd.pandas.set_option('display.max_columns',None)

In [2]:
#Loading the dataset
dataset=pd.read_csv("K:\\DATA SCIENCE Reference\\Projects\\Credit-Card-Default-Prediction\\Data\\UCI_Credit_Card.csv")

In [3]:
dataset

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,-2,-2,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,0,2,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,0,0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,0,0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,0,0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29996,220000.0,1,3,1,39,0,0,0,0,0,0,188948.0,192815.0,208365.0,88004.0,31237.0,15980.0,8500.0,20000.0,5003.0,3047.0,5000.0,1000.0,0
29996,29997,150000.0,1,3,2,43,-1,-1,-1,-1,0,0,1683.0,1828.0,3502.0,8979.0,5190.0,0.0,1837.0,3526.0,8998.0,129.0,0.0,0.0,0
29997,29998,30000.0,1,2,2,37,4,3,2,-1,0,0,3565.0,3356.0,2758.0,20878.0,20582.0,19357.0,0.0,0.0,22000.0,4200.0,2000.0,3100.0,1
29998,29999,80000.0,1,3,1,41,1,-1,0,0,0,-1,-1645.0,78379.0,76304.0,52774.0,11855.0,48944.0,85900.0,3409.0,1178.0,1926.0,52964.0,1804.0,1


In [4]:
dataset.shape

(30000, 25)

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          30000 non-null  int64  
 1   LIMIT_BAL                   30000 non-null  float64
 2   SEX                         30000 non-null  int64  
 3   EDUCATION                   30000 non-null  int64  
 4   MARRIAGE                    30000 non-null  int64  
 5   AGE                         30000 non-null  int64  
 6   PAY_0                       30000 non-null  int64  
 7   PAY_2                       30000 non-null  int64  
 8   PAY_3                       30000 non-null  int64  
 9   PAY_4                       30000 non-null  int64  
 10  PAY_5                       30000 non-null  int64  
 11  PAY_6                       30000 non-null  int64  
 12  BILL_AMT1                   30000 non-null  float64
 13  BILL_AMT2                   300

#### Renaming the PAY_0 with PAY_1 and Output feature(default.payment.next.month) with Default_Prediction

In [6]:
dataset.rename(columns={'PAY_0':'PAY_1'},inplace=True)


dataset.rename(columns={'default.payment.next.month':'Default_Prediction'},inplace=True)

In [7]:
dataset.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Default_Prediction
0,1,20000.0,2,2,1,24,2,2,-1,-1,-2,-2,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,0,2,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,0,0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,0,0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,0,0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [8]:
dataset.columns

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1',
       'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'Default_Prediction'],
      dtype='object')

In [9]:
#Replacing values in the features with their Actual names
dataset['SEX']=dataset['SEX'].replace({1:'male', 2:'female'})

#### Here, we have some other values in Education like {0,4,5,6} which are not in first 3 categories. So, we are replacing all with section 4

In [10]:
dataset['EDUCATION']=dataset['EDUCATION'].replace({0:4,5:4,6:4})
dataset['EDUCATION']=dataset['EDUCATION'].replace({1:'graduate school',2:'university',3:'high school',4:'others'})

In [11]:
dataset['MARRIAGE']=dataset['MARRIAGE'].replace({0:3})
dataset['MARRIAGE']=dataset['MARRIAGE'].replace({1:'married',2:'single',3:'others'})

#### We are replacing the values of all PAY_X features -1,-2 with 0.

In [12]:
for i in range(1,7):
    field='PAY_'+str(i)
    dataset[field]=dataset[field].replace({-1:0})
    dataset[field]=dataset[field].replace({-2:0})

In [13]:
#Dropping the unique ID column
dataset=dataset.drop(columns='ID')

#### Finding Outliers in LIMIT_BAL

In [14]:
outliers = []
def Finding_outliers(data):
    data = sorted(data)
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    IQR = q3-q1
    lwr_bound = q1-(1.5*IQR)
    upr_bound = q3+(1.5*IQR)
    for i in data: 
        if (i<lwr_bound or i>upr_bound):
            outliers.append(i)
    return outliers

In [15]:
outliers = Finding_outliers(dataset['LIMIT_BAL'])

#### Handling Outliers

Replacing them with mean/median

In [16]:
median = np.median(dataset['LIMIT_BAL'])# Replace with median
for i in outliers:
    c = np.where(dataset['LIMIT_BAL']==i, median, dataset['LIMIT_BAL'])
dataset['LIMIT_BAL']=c

####  There is high correlation between the BILL_AMTX features. So, dropping the last 5 BILL_AMT features.

In [17]:
dataset=dataset.drop(columns=['BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6'])

In [18]:
X=dataset.iloc[:,:-1]
y=dataset.iloc[:,-1]
#------------

In [19]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

In [20]:
X_train

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
22788,160000.0,female,university,single,33,2,2,3,2,0,0,161771.0,15000.0,0.0,0.0,6100.0,12300.0,6100.0
29006,150000.0,female,graduate school,single,34,1,0,0,0,0,0,0.0,53.0,0.0,0.0,0.0,0.0,0.0
16950,10000.0,male,university,married,50,1,2,0,0,0,0,10171.0,2.0,1281.0,1134.0,294.0,305.0,1000.0
22280,220000.0,female,graduate school,single,29,0,0,0,0,0,0,206879.0,9100.0,9000.0,7887.0,4800.0,4900.0,6000.0
11346,310000.0,female,graduate school,single,32,1,0,0,0,0,0,0.0,0.0,326.0,0.0,0.0,0.0,1200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23479,100000.0,female,university,single,31,0,0,0,0,0,0,91344.0,3309.0,4421.0,5486.0,3400.0,3541.0,3894.0
13921,120000.0,female,university,single,24,0,0,0,0,0,2,112336.0,4200.0,4100.0,4100.0,10000.0,4560.0,0.0
3794,120000.0,female,graduate school,single,24,0,0,0,0,0,0,75796.0,3700.0,2023.0,2016.0,2000.0,1200.0,1000.0
27565,360000.0,male,graduate school,married,57,1,0,0,0,0,0,0.0,0.0,860.0,246.0,0.0,0.0,0.0


In [21]:
numerical_columns=X_train.select_dtypes(include=['int64','float64']).columns

In [22]:
numerical_columns

Index(['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5',
       'PAY_6', 'BILL_AMT1', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4',
       'PAY_AMT5', 'PAY_AMT6'],
      dtype='object')

In [23]:
#numerical pipeline

numerical_pipeline=Pipeline([
    ('feature_scaling',StandardScaler())
])

In [24]:
numerical_pipeline

In [25]:
categorical_columns=X_train.select_dtypes(include=['object','category']).columns

In [26]:
categorical_columns

Index(['SEX', 'EDUCATION', 'MARRIAGE'], dtype='object')

In [27]:
'''class MyLabelBinarizer(TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.encoder = LabelEncoder(*args, **kwargs)
    def fit(self, x, y=0):
        self.encoder.fit(x)
        return self
    def transform(self, x, y=0):
        return self.encoder.transform(x)'''

'class MyLabelBinarizer(TransformerMixin):\n    def __init__(self, *args, **kwargs):\n        self.encoder = LabelEncoder(*args, **kwargs)\n    def fit(self, x, y=0):\n        self.encoder.fit(x)\n        return self\n    def transform(self, x, y=0):\n        return self.encoder.transform(x)'

In [41]:
#Categorical pipeline
categorical_pipeline=Pipeline([
    ('categorical_encoder', OrdinalEncoder())
])

In [42]:
categorical_pipeline

In [43]:
column_pipeline=ColumnTransformer([
    ("numerical_pipeline",numerical_pipeline,numerical_columns),
    ("categorical_pipeline",categorical_pipeline,categorical_columns)
])
column_pipeline

In [44]:
X_train

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
22788,160000.0,female,university,single,33,2,2,3,2,0,0,161771.0,15000.0,0.0,0.0,6100.0,12300.0,6100.0
29006,150000.0,female,graduate school,single,34,1,0,0,0,0,0,0.0,53.0,0.0,0.0,0.0,0.0,0.0
16950,10000.0,male,university,married,50,1,2,0,0,0,0,10171.0,2.0,1281.0,1134.0,294.0,305.0,1000.0
22280,220000.0,female,graduate school,single,29,0,0,0,0,0,0,206879.0,9100.0,9000.0,7887.0,4800.0,4900.0,6000.0
11346,310000.0,female,graduate school,single,32,1,0,0,0,0,0,0.0,0.0,326.0,0.0,0.0,0.0,1200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23479,100000.0,female,university,single,31,0,0,0,0,0,0,91344.0,3309.0,4421.0,5486.0,3400.0,3541.0,3894.0
13921,120000.0,female,university,single,24,0,0,0,0,0,2,112336.0,4200.0,4100.0,4100.0,10000.0,4560.0,0.0
3794,120000.0,female,graduate school,single,24,0,0,0,0,0,0,75796.0,3700.0,2023.0,2016.0,2000.0,1200.0,1000.0
27565,360000.0,male,graduate school,married,57,1,0,0,0,0,0,0.0,0.0,860.0,246.0,0.0,0.0,0.0


In [45]:
# Preprocessing the X_train set through pipeline

X_train_processed=column_pipeline.fit_transform(X_train)

TypeError: fit_transform() takes 2 positional arguments but 3 were given

In [36]:
X_train.shape,X_train_processed.shape,y_train.shape

((24000, 18), (24000, 18), (24000,))

In [40]:
X_train_processed[0:5]

array([[-5.66382866e-02, -2.64557688e-01,  2.15883325e+00,
         2.09047442e+00,  3.42561767e+00,  2.31949489e+00,
        -3.09985898e-01, -3.19563251e-01,  1.50554693e+00,
         5.80657369e-01, -2.90332410e-01, -2.97819969e-01,
         8.69611645e-02,  5.00397378e-01,  4.87448579e-02,
         0.00000000e+00,  3.00000000e+00,  2.00000000e+00],
       [-1.33919737e-01, -1.55803693e-01,  8.44341725e-01,
        -3.98617737e-01, -3.84563725e-01, -3.42320557e-01,
        -3.09985898e-01, -3.19563251e-01, -6.95164530e-01,
        -3.44969229e-01, -2.90332410e-01, -2.97819969e-01,
        -3.18075101e-01, -3.16978470e-01, -2.92992314e-01,
         0.00000000e+00,  0.00000000e+00,  2.00000000e+00],
       [-1.21586004e+00,  1.58426024e+00,  8.44341725e-01,
         2.09047442e+00, -3.84563725e-01, -3.42320557e-01,
        -3.09985898e-01, -3.19563251e-01, -5.56799580e-01,
        -3.48127519e-01, -2.27081151e-01, -2.33068771e-01,
        -2.98553681e-01, -2.96710207e-01, -2.36969827e

In [38]:
X_train.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
22788,160000.0,female,university,single,33,2,2,3,2,0,0,161771.0,15000.0,0.0,0.0,6100.0,12300.0,6100.0
29006,150000.0,female,graduate school,single,34,1,0,0,0,0,0,0.0,53.0,0.0,0.0,0.0,0.0,0.0
16950,10000.0,male,university,married,50,1,2,0,0,0,0,10171.0,2.0,1281.0,1134.0,294.0,305.0,1000.0
22280,220000.0,female,graduate school,single,29,0,0,0,0,0,0,206879.0,9100.0,9000.0,7887.0,4800.0,4900.0,6000.0
11346,310000.0,female,graduate school,single,32,1,0,0,0,0,0,0.0,0.0,326.0,0.0,0.0,0.0,1200.0


In [45]:
smote = SMOTE(sampling_strategy=0.5)
X_train1,y_train1 = smote.fit_resample(X_train_processed,y_train)  

In [46]:
xgc = XGBClassifier(n_estimators=500,max_depth=3,n_jobs=-1, use_label_encoder =False)
xgc.fit(X_train1,y_train1)

In [47]:
X_test=column_pipeline.transform(X_test)
print(X_test.shape)

#X_test,y_test = smote.fit_resample(X_test,y_test)
predict=xgc.predict(X_test)

(6000, 18)


In [49]:
print('Accuracy --> ',accuracy_score(y_test,predict))
print('F1 Score --> ',f1_score(y_test,predict))
print('Classification Report  --> \n',classification_report(y_test,predict))
print(pd.crosstab(y_test,predict))

#ROC-AUC score
auc_score = roc_auc_score(y_test, predict)
print('ROC-AUC Score:',auc_score)

Accuracy -->  0.8115
F1 Score -->  0.46423495973472284
Classification Report  --> 
               precision    recall  f1-score   support

           0       0.84      0.94      0.89      4673
           1       0.62      0.37      0.46      1327

    accuracy                           0.81      6000
   macro avg       0.73      0.65      0.67      6000
weighted avg       0.79      0.81      0.79      6000

col_0                  0    1
Default_Prediction           
0                   4379  294
1                    837  490
ROC-AUC Score: 0.653169670206969


In [54]:
gboost=GradientBoostingClassifier(n_estimators=500,learning_rate=0.05,random_state=100,max_features=5 )
gboost.fit(X_train1,y_train1)
predict=gboost.predict(X_test)

In [55]:
print('Accuracy --> ',accuracy_score(y_test,predict))
print('F1 Score --> ',f1_score(y_test,predict))
print('Classification Report  --> \n',classification_report(y_test,predict))
print(pd.crosstab(y_test,predict))

#ROC-AUC score
auc_score = roc_auc_score(y_test, predict)
print('ROC-AUC Score:',auc_score)

Accuracy -->  0.8165
F1 Score -->  0.5015844273426889
Classification Report  --> 
               precision    recall  f1-score   support

           0       0.85      0.93      0.89      4673
           1       0.63      0.42      0.50      1327

    accuracy                           0.82      6000
   macro avg       0.74      0.67      0.69      6000
weighted avg       0.80      0.82      0.80      6000

col_0                  0    1
Default_Prediction           
0                   4345  328
1                    773  554
ROC-AUC Score: 0.6736462943256092


In [53]:
print(gboost)

GradientBoostingClassifier()


In [61]:
pickle.dump(gboost,open('model.pkl','wb'))

In [None]:
# load saved model
with open('model.pkl' , 'rb') as f:
    lr = pickle.load(f)

In [62]:
pickled_model = pickle.load(open('model.pkl', 'rb'))

In [66]:

l=pickled_model.predict([[-0.67265992, -1.2446578 , -0.46819953, -0.39977279, -0.38414798,
       -0.34067364, -0.30904877, -0.31852141,  0.32966974, -0.12689188,
       -0.03953698, -0.17356677, -0.22465419, -0.21600791, -0.24524018,
        0.        ,  3.        ,  2.        ]]) # similar

In [67]:
print(l)

[0]


In [None]:

l=pickled_model.predict([[310000.0,2,1,2,32,1,0,0,0,0,0,0.0,0.0,326.0,0.0,0.0,0.0,1200.0]])

In [None]:
l

In [None]:
-----------------------------------------------------------------------

### Encoding the categorical features

In [None]:
le = LabelEncoder()
X['SEX'] = le.fit_transform(X['SEX'])
X['EDUCATION'] = le.fit_transform(X['EDUCATION'])
X['MARRIAGE']=le.fit_transform(X['MARRIAGE'])

### Splitting the dataset into Train set and Test set

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

### Feature Scaling

In [None]:
sc=StandardScaler()
X_train[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']] = sc.fit_transform(X_train[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1','PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']])

In [None]:
X_test[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']] = sc.transform(X_test[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1','PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']])

### Balancing the datset using sampling techniques

In [None]:
smote = SMOTE(sampling_strategy=0.5)
X_train1,y_train1 = smote.fit_resample(X_train,y_train)

In [None]:
xgc = XGBClassifier(n_estimators=500,max_depth=3,n_jobs=-1, use_label_encoder =False)
xgc.fit(X_train1,y_train1)
predict = xgc.predict(X_test)

In [None]:
print('Accuracy --> ',accuracy_score(y_test,predict))
print('F1 Score --> ',f1_score(y_test,predict))
print('Classification Report  --> \n',classification_report(y_test,predict))
print(pd.crosstab(y_test,predict))

#ROC-AUC score
auc_score = roc_auc_score(y_test, predict)
print('ROC-AUC Score:',auc_score)

In [None]:
rforest=RandomForestClassifier(n_estimators=10, max_depth=3, criterion='entropy',random_state=0)
rforest.fit(X_train1,y_train1)
predict=rforest.predict(X_test)

In [None]:
print('Accuracy --> ',accuracy_score(y_test,predict))
print('F1 Score --> ',f1_score(y_test,predict))
print('Classification Report  --> \n',classification_report(y_test,predict))
print(pd.crosstab(y_test,predict))

#ROC-AUC score
auc_score = roc_auc_score(y_test, predict)
print('ROC-AUC Score:',auc_score)

In [None]:
gboost=GradientBoostingClassifier()
gboost.fit(X_train1,y_train1)
predict=gboost.predict(X_test)

In [None]:
print('Accuracy --> ',accuracy_score(y_test,predict))
print('F1 Score --> ',f1_score(y_test,predict))
print('Classification Report  --> \n',classification_report(y_test,predict))
print(pd.crosstab(y_test,predict))

#ROC-AUC score
auc_score = roc_auc_score(y_test, predict)
print('ROC-AUC Score:',auc_score)