In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
df=pd.read_csv('credi_card.csv')
df.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0
1,50000,1,1,2,37,0,0,0,0,0,...,19394,19619,20024,2500,1815,657,1000,1000,800,0
2,500000,1,1,2,29,0,0,0,0,0,...,542653,483003,473944,55000,40000,38000,20239,13750,13770,0
3,100000,2,2,2,23,0,-1,-1,0,0,...,221,-159,567,380,601,0,581,1687,1542,0
4,140000,2,3,1,28,0,0,2,0,0,...,12211,11793,3719,3329,0,432,1000,1000,1000,0


In [7]:
X=df.drop('default payment next month',axis=1)
y=df['default payment next month']

In [8]:
from sklearn.model_selection import train_test_split

Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.30,random_state=17)

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.impute import KNNImputer
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix,classification_report

In [14]:
def model_performance(preprocessor,xtrain,xtest,ytrain,ytest):
    models={
        'DecisionTreeClassifier':DecisionTreeClassifier(),
        'GaussianNB':GaussianNB(),
        'GradientBoostingClassifier':GradientBoostingClassifier(),
        'RandomForestClassifier':RandomForestClassifier(),
        'SVC':SVC(),
        'KNeighborsClassifier':KNeighborsClassifier(),
        'RidgeClassifier': RidgeClassifier()
    }
    for name,model in models.items():
        pipeline=make_pipeline(preprocessor,model)
        pipeline.fit(xtrain,ytrain)
        ypred=pipeline.predict(xtest)

        #checking the metrics of model

        matrix=confusion_matrix(ytest,ypred)
        report=classification_report(ytest,ypred)

        print(f'{name}---confusion_matrix---')
        print(f' {matrix}')
        print(f'{name}---classification_report---')
        print(f"{report}\n")

In [15]:
preprocessor_01=make_pipeline(KNNImputer(n_neighbors=3),StandardScaler())
preprocessor_02=make_pipeline(KNNImputer(n_neighbors=3),RobustScaler())
preprocessor_03=make_pipeline(KNNImputer(n_neighbors=3),MinMaxScaler())

print("=============== StandardScaler================")
model_performance(preprocessor_01,Xtrain,Xtest,ytrain,ytest)
print("=============== RobustScaler================")
model_performance(preprocessor_02,Xtrain,Xtest,ytrain,ytest)
print("=============== MinmaxdScaler================")
model_performance(preprocessor_03,Xtrain,Xtest,ytrain,ytest)

DecisionTreeClassifier---confusion_matrix---
 [[202  26]
 [ 46  27]]
DecisionTreeClassifier---classification_report---
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       228
           1       0.51      0.37      0.43        73

    accuracy                           0.76       301
   macro avg       0.66      0.63      0.64       301
weighted avg       0.74      0.76      0.75       301


GaussianNB---confusion_matrix---
 [[176  52]
 [ 30  43]]
GaussianNB---classification_report---
              precision    recall  f1-score   support

           0       0.85      0.77      0.81       228
           1       0.45      0.59      0.51        73

    accuracy                           0.73       301
   macro avg       0.65      0.68      0.66       301
weighted avg       0.76      0.73      0.74       301


GradientBoostingClassifier---confusion_matrix---
 [[211  17]
 [ 56  17]]
GradientBoostingClassifier---classification_report---
   

from the above report all models are baising to  0 except Decesiontreeclasifier 
so we are going to use that algorithem for aprediction


In [29]:
from sklearn.model_selection import GridSearchCV

classifier=DecisionTreeClassifier(random_state=17)

param_gris={
    'criterion':['gini', 'entropy', 'log_loss'],
    'splitter':['best','random'],
    'max_depth':[3,4,5,6,7,8,9]
}

grid=GridSearchCV(classifier,param_gris,cv=5,verbose=1)
grid.fit(Xtrain,ytrain)

Fitting 5 folds for each of 42 candidates, totalling 210 fits


In [30]:
grid.best_params_

{'criterion': 'gini', 'max_depth': 3, 'splitter': 'best'}

In [31]:
grid.best_estimator_.score(Xtest,ytest)

0.7342192691029901