In [60]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
from sklearn.svm import SVC

In [61]:
df = pd.read_csv("https://raw.githubusercontent.com/AchmadFanyFadheli200411100088/dataminingweb/main/credit_score.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,kode_kontrak,pendapatan_setahun_juta,kpr_aktif,durasi_pinjaman_bulan,jumlah_tanggungan,rata_rata_overdue,risk_rating
0,1,AGR-000001,295,YA,48,5,61 - 90 days,4
1,2,AGR-000011,271,YA,36,5,61 - 90 days,4
2,3,AGR-000030,159,TIDAK,12,0,0 - 30 days,1
3,4,AGR-000043,210,YA,12,3,46 - 60 days,3
4,5,AGR-000049,165,TIDAK,36,0,31 - 45 days,2


In [62]:
df_without_column_for_convert = pd.DataFrame(df, columns = ['kode_kontrak','pendapatan_setahun_juta','durasi_pinjaman_bulan','jumlah_tanggungan','risk_rating'])
df_without_column_for_convert.head()

Unnamed: 0,kode_kontrak,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan,risk_rating
0,AGR-000001,295,48,5,4
1,AGR-000011,271,36,5,4
2,AGR-000030,159,12,0,1
3,AGR-000043,210,12,3,3
4,AGR-000049,165,36,0,2


In [63]:
df_ratarata=pd.get_dummies(df['rata_rata_overdue'])
df_ratarata.head()

Unnamed: 0,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days
0,0,0,0,1,0
1,0,0,0,1,0
2,1,0,0,0,0
3,0,0,1,0,0
4,0,1,0,0,0


In [64]:
df_kpr_aktif=pd.get_dummies(df['kpr_aktif'])
df_kpr_aktif

Unnamed: 0,TIDAK,YA
0,0,1
1,0,1
2,1,0
3,0,1
4,1,0
...,...,...
895,0,1
896,0,1
897,1,0
898,1,0


In [65]:
df_new = pd.concat([df_without_column_for_convert, df_kpr_aktif,df_ratarata], axis=1)
df_new.head()

Unnamed: 0,kode_kontrak,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan,risk_rating,TIDAK,YA,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days
0,AGR-000001,295,48,5,4,0,1,0,0,0,1,0
1,AGR-000011,271,36,5,4,0,1,0,0,0,1,0
2,AGR-000030,159,12,0,1,1,0,1,0,0,0,0
3,AGR-000043,210,12,3,3,0,1,0,0,1,0,0
4,AGR-000049,165,36,0,2,1,0,0,1,0,0,0


In [66]:
df_risk_rating = pd.DataFrame(df, columns = ['risk_rating'])
df_risk_rating.head()

Unnamed: 0,risk_rating
0,4
1,4
2,1
3,3
4,2


In [67]:
df_without_risk_rating=df_new.drop(['risk_rating'], axis=1)

In [68]:
df_new2 = pd.concat([df_without_risk_rating,df_risk_rating], axis=1)
df_new2

Unnamed: 0,kode_kontrak,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan,TIDAK,YA,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days,risk_rating
0,AGR-000001,295,48,5,0,1,0,0,0,1,0,4
1,AGR-000011,271,36,5,0,1,0,0,0,1,0,4
2,AGR-000030,159,12,0,1,0,1,0,0,0,0,1
3,AGR-000043,210,12,3,0,1,0,0,1,0,0,3
4,AGR-000049,165,36,0,1,0,0,1,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...
895,AGR-010739,112,48,5,0,1,0,0,0,0,1,5
896,AGR-010744,120,48,2,0,1,0,0,1,0,0,3
897,AGR-010758,166,24,2,1,0,1,0,0,0,0,1
898,AGR-010775,196,48,0,1,0,0,1,0,0,0,2


In [69]:
df_drop_column_for_df_new3=df_new2.drop(['kode_kontrak'], axis=1)
df_drop_column_for_df_new3
df_new3=df_drop_column_for_df_new3
df_new3.head()

Unnamed: 0,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan,TIDAK,YA,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days,risk_rating
0,295,48,5,0,1,0,0,0,1,0,4
1,271,36,5,0,1,0,0,0,1,0,4
2,159,12,0,1,0,1,0,0,0,0,1
3,210,12,3,0,1,0,0,1,0,0,3
4,165,36,0,1,0,0,1,0,0,0,2


In [70]:
scalerData = MinMaxScaler()
scalerPendapatan = MinMaxScaler()
scalerDurasi = MinMaxScaler()
scalerTanggungan = MinMaxScaler()

In [71]:
dataNormalisasi = ['pendapatan_setahun_juta', 'durasi_pinjaman_bulan', 'jumlah_tanggungan']
FrameNormalisasi = df_new3[dataNormalisasi]
FrameNormalisasi

Unnamed: 0,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan
0,295,48,5
1,271,36,5
2,159,12,0
3,210,12,3
4,165,36,0
...,...,...,...
895,112,48,5
896,120,48,2
897,166,24,2
898,196,48,0


In [72]:
dataPendapatan = df_new3[['pendapatan_setahun_juta']]
dataDurasi = df_new3[['durasi_pinjaman_bulan']]
dataTanggungan = df_new3[['jumlah_tanggungan']]

In [73]:
FramehasilNormalisasi = scalerData.fit_transform(FrameNormalisasi.values)
FramehasilNormalisasi

array([[0.97826087, 1.        , 0.83333333],
       [0.87391304, 0.66666667, 0.83333333],
       [0.38695652, 0.        , 0.        ],
       ...,
       [0.4173913 , 0.33333333, 0.33333333],
       [0.54782609, 1.        , 0.        ],
       [0.5826087 , 0.33333333, 0.33333333]])

In [74]:
FramePendapatan = scalerPendapatan.fit_transform(dataPendapatan.values)
hasilPendapatan = scalerPendapatan.transform(FramePendapatan)
hasilPendapatan = scalerPendapatan.transform(np.array([[295]]))
hasilPendapatan

array([[0.97826087]])

In [75]:
FrameDurasi = scalerDurasi.fit_transform(dataDurasi.values)
hasilDurasi = scalerDurasi.transform(FrameDurasi)
hasilDurasi = scalerDurasi.transform(np.array([[48]]))
hasilDurasi

array([[1.]])

In [76]:
FrameTanggungan = scalerTanggungan.fit_transform(dataTanggungan.values)
hasilTanggungan = scalerTanggungan.transform(FrameTanggungan)
hasilTanggungan = scalerTanggungan.transform(np.array([[5]]))
hasilTanggungan

array([[0.83333333]])

In [77]:
from sklearn.metrics import make_scorer, accuracy_score,precision_score
from sklearn.metrics import accuracy_score ,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold,train_test_split,cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

In [78]:
X = df_new3[['pendapatan_setahun_juta','durasi_pinjaman_bulan','jumlah_tanggungan','TIDAK','YA','0 - 30 days','31 - 45 days','46 - 60 days','61 - 90 days','> 90 days']]
y = df_new3['risk_rating']

In [79]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

In [80]:
gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
Y_pred = gaussian.predict(X_test) 
accuracy_nb=round(accuracy_score(y_test,Y_pred)* 100, 2)
acc_gaussian = round(gaussian.score(X_train, y_train) * 100, 2)

cm = confusion_matrix(y_test, Y_pred)
accuracy = accuracy_score(y_test,Y_pred)
precision =precision_score(y_test, Y_pred,average='micro')
recall =  recall_score(y_test, Y_pred,average='micro')
f1 = f1_score(y_test,Y_pred,average='micro')
print('Confusion matrix for Naive Bayes\n',cm)
print('accuracy_Naive Bayes: %.3f' %accuracy)
print('precision_Naive Bayes: %.3f' %precision)
print('recall_Naive Bayes: %.3f' %recall)
print('f1-score_Naive Bayes : %.3f' %f1)

Confusion matrix for Naive Bayes
 [[69  0  0  0  0]
 [ 0 49  0  0  0]
 [ 0  0 84  0  0]
 [ 0  0  0 36  0]
 [ 0  0  0  0 32]]
accuracy_Naive Bayes: 1.000
precision_Naive Bayes: 1.000
recall_Naive Bayes: 1.000
f1-score_Naive Bayes : 1.000


In [81]:
FramehasilNormalisasi = pd.DataFrame(FramehasilNormalisasi,columns = dataNormalisasi)
FramehasilNormalisasi

Unnamed: 0,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan
0,0.978261,1.000000,0.833333
1,0.873913,0.666667,0.833333
2,0.386957,0.000000,0.000000
3,0.608696,0.000000,0.500000
4,0.413043,0.666667,0.000000
...,...,...,...
895,0.182609,1.000000,0.833333
896,0.217391,1.000000,0.333333
897,0.417391,0.333333,0.333333
898,0.547826,1.000000,0.000000


In [82]:
df_new3 = df_new3.drop(columns = dataNormalisasi)
df_new3 = pd.concat([df_new3, FramehasilNormalisasi], axis=1)
df_new3

Unnamed: 0,TIDAK,YA,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days,risk_rating,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan
0,0,1,0,0,0,1,0,4,0.978261,1.000000,0.833333
1,0,1,0,0,0,1,0,4,0.873913,0.666667,0.833333
2,1,0,1,0,0,0,0,1,0.386957,0.000000,0.000000
3,0,1,0,0,1,0,0,3,0.608696,0.000000,0.500000
4,1,0,0,1,0,0,0,2,0.413043,0.666667,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
895,0,1,0,0,0,0,1,5,0.182609,1.000000,0.833333
896,0,1,0,0,1,0,0,3,0.217391,1.000000,0.333333
897,1,0,1,0,0,0,0,1,0.417391,0.333333,0.333333
898,1,0,0,1,0,0,0,2,0.547826,1.000000,0.000000


In [83]:
percent_amount_of_test_data = 0.3

In [84]:
matrices_X = df_new3.iloc[:,1:11].values
matrices_Y = df_new3.iloc[:,0].values

In [85]:
X1 = df_new3.iloc[:,1:11].values
Y1 = df_new3.iloc[:, 0].values

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size = percent_amount_of_test_data, random_state=0)

In [87]:
X_train.shape

(630, 10)

In [88]:
Y1

array([0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,

In [89]:
model_accuracy = OrderedDict()

In [90]:
model_precision = OrderedDict()

In [91]:
model_recall = OrderedDict()

In [92]:
df_new3

Unnamed: 0,TIDAK,YA,0 - 30 days,31 - 45 days,46 - 60 days,61 - 90 days,> 90 days,risk_rating,pendapatan_setahun_juta,durasi_pinjaman_bulan,jumlah_tanggungan
0,0,1,0,0,0,1,0,4,0.978261,1.000000,0.833333
1,0,1,0,0,0,1,0,4,0.873913,0.666667,0.833333
2,1,0,1,0,0,0,0,1,0.386957,0.000000,0.000000
3,0,1,0,0,1,0,0,3,0.608696,0.000000,0.500000
4,1,0,0,1,0,0,0,2,0.413043,0.666667,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
895,0,1,0,0,0,0,1,5,0.182609,1.000000,0.833333
896,0,1,0,0,1,0,0,3,0.217391,1.000000,0.333333
897,1,0,1,0,0,0,0,1,0.417391,0.333333,0.333333
898,1,0,0,1,0,0,0,2,0.547826,1.000000,0.000000


In [93]:
naive_bayes_classifier = GaussianNB()
naive_bayes_classifier.fit(X_train, y_train)
Y_pred_nb = naive_bayes_classifier.predict(X_test)

cm = confusion_matrix(y_test, Y_pred_nb)

print('Confusion matrix for Gaussian Naive Bayes\n',cm)

naive_bayes_accuracy = round(100 * accuracy_score(y_test, Y_pred_nb), 2)
model_accuracy['Gaussian Naive Bayes'] = naive_bayes_accuracy

naive_bayes_precision = round(100 * precision_score(y_test, Y_pred_nb, average = 'weighted'), 2)
model_precision['Gaussian Naive Bayes'] = naive_bayes_precision

naive_bayes_recall = round(100 * recall_score(y_test, Y_pred_nb, average = 'weighted'), 2)
model_recall['Gaussian Naive Bayes'] = naive_bayes_recall

print('The accuracy of this model is {} %.'.format(naive_bayes_accuracy))
print('The precision of this model is {} %.'.format(naive_bayes_precision))
print('The recall of this model is {} %.'.format(naive_bayes_recall))

Confusion matrix for Gaussian Naive Bayes
 [[150   0]
 [  0 120]]
The accuracy of this model is 100.0 %.
The precision of this model is 100.0 %.
The recall of this model is 100.0 %.


In [94]:
X_test

array([[0.        , 0.        , 1.        , ..., 0.38695652, 1.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.63043478, 0.        ,
        0.        ],
       [1.        , 0.        , 0.        , ..., 0.1173913 , 1.        ,
        0.5       ],
       ...,
       [0.        , 0.        , 1.        , ..., 0.25217391, 1.        ,
        0.        ],
       [1.        , 0.        , 0.        , ..., 0.44782609, 1.        ,
        0.83333333],
       [1.        , 0.        , 0.        , ..., 0.62173913, 0.33333333,
        0.5       ]])

In [95]:
model = naive_bayes_classifier.predict(np.array([[0,1,0,0,0,1,0,0.978261,1.000000	,0.833333]]))
model

array([1], dtype=uint8)

In [96]:
Y_pred_nb

array([1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0], dtype=uint8)

In [97]:
clf = GaussianNB()
clf.fit(matrices_X, matrices_Y)
clf_pf = GaussianNB()
clf_pf.partial_fit(matrices_X, matrices_Y, np.unique(matrices_Y))

GaussianNB()

In [98]:
FIRST_IDX = 0

In [99]:
# try with value [0,	0,	0,	0,	0,	0,	1,	0.582609,	0.666667,	0]
result_test_naive_bayes = clf_pf.predict([[0,	1,	0,	0,	0,	0,	0,	0.582609,	0.666667,	0]])[FIRST_IDX]
print(f"Customer : Putra Memiliki risk rating {result_test_naive_bayes} Pada metode Gaussian Naive Bayes model")

Customer : Putra Memiliki risk rating 1 Pada metode Gaussian Naive Bayes model


In [100]:
K = 15
knn = KNeighborsClassifier(n_neighbors = K)

knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=15)

In [101]:
Y_pred_knn = knn.predict(X_test)

In [102]:
knn.score(X_test, y_test)

0.9925925925925926

In [103]:
# Custom value to predict
result_test_knn = knn.predict([[0,	1,	0,	0,	0,	0,	0,	0.582609,	0.666667,	0]])
print(f"Customer : Putra Memiliki risk rating {result_test_knn[FIRST_IDX]} Pada metode KNN model")

Customer : Putra Memiliki risk rating 1 Pada metode KNN model


In [104]:
### Making the confusion matrix
cm = confusion_matrix(y_test, Y_pred_knn)

### Printing the accuracy, precision, and recall of the model
print('Confusion matrix for K - Nearest Neighbors\n',cm)
nn1_accuracy = round(100 * accuracy_score(y_test, Y_pred_knn), 2)
model_accuracy['1 - Nearest Neighbors'] = nn1_accuracy

nn1_precision = round(100 * precision_score(y_test, Y_pred_knn, average = 'weighted'), 2)
model_precision['1 - Nearest Neighbors'] = nn1_precision

nn1_recall = round(100 * recall_score(y_test, Y_pred_knn, average = 'weighted'), 2)
model_recall['1 - Nearest Neighbors'] = nn1_recall

print('The accuracy of this model is {} %.'.format(nn1_accuracy))
print('The precision of this model is {} %.'.format(nn1_precision))
print('The recall of this model is {} %.'.format(nn1_recall))

Confusion matrix for K - Nearest Neighbors
 [[150   0]
 [  2 118]]
The accuracy of this model is 99.26 %.
The precision of this model is 99.27 %.
The recall of this model is 99.26 %.


In [105]:
### Applying Decision Tree Classification model

decision_tree_classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 15)
decision_tree_classifier.fit(X_train, y_train)


### Predicting the Test set results

Y_pred_dc = decision_tree_classifier.predict(X_test)

In [106]:
### Making the confusion matrix
cm = confusion_matrix(y_test, Y_pred_dc)

### Printing the accuracy, precision, and recall of the model
print('Confusion matrix for Decision Tree\n',cm)

decision_tree_accuracy = round(100 * accuracy_score(y_test, Y_pred_dc), 2)
model_accuracy['Decision Tree'] = decision_tree_accuracy

decision_tree_precision = round(100 * precision_score(y_test, Y_pred_dc, average = 'weighted'), 2)
model_precision['Decision Tree'] = decision_tree_precision

decision_tree_recall = round(100 * recall_score(y_test, Y_pred_dc, average = 'weighted'), 2)
model_recall['Decision Tree'] = decision_tree_recall

print('The accuracy of this model is {} %.'.format(decision_tree_accuracy))
print('The precision of this model is {} %.'.format(decision_tree_precision))
print('The recall of this model is {} %.'.format(decision_tree_recall))

Confusion matrix for Decision Tree
 [[150   0]
 [  0 120]]
The accuracy of this model is 100.0 %.
The precision of this model is 100.0 %.
The recall of this model is 100.0 %.


In [107]:
# Custom value to predict
result_test_knn = decision_tree_classifier.predict([[0,	1,	0,	0,	0,	0,	0,	0.582609,	0.666667,	0]])
print(f"Customer : Putra Memiliki risk rating {result_test_knn[FIRST_IDX]} Pada metode Decision Tree model")

Customer : Putra Memiliki risk rating 1 Pada metode Decision Tree model


In [108]:
clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(),n_estimators=10, random_state=0).fit(X_train, y_train)
rsb = clf.predict(X_test)
b = ['Decision Tree']
Tree = pd.DataFrame(rsb,columns = b)

In [109]:
X_test.shape

(270, 10)

In [110]:
clf = BaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors = K),n_estimators=10, random_state=0).fit(X_train, y_train)
rsa = clf.predict(X_test)
a = ['KNN']
KNN = pd.DataFrame(rsa,columns = a)

In [111]:
clf = BaggingClassifier(base_estimator=GaussianNB(),n_estimators=10, random_state=0).fit(X_train, y_train)
rsc = clf.predict(X_test)
c = ['Naive Bayes']
Bayes = pd.DataFrame(rsc,columns = c)

In [112]:
Result = pd.concat([Tree, KNN,Bayes], axis=1)
Result

Unnamed: 0,Decision Tree,KNN,Naive Bayes
0,1,1,1
1,1,1,1
2,0,0,0
3,1,1,1
4,0,0,0
...,...,...,...
265,1,1,1
266,0,0,0
267,1,1,1
268,0,0,0


In [113]:
bagging_accuracy1 = round(100 * accuracy_score(y_test, Bayes), 2)
bagging_accuracy2 = round(100 * accuracy_score(y_test, Tree), 2)
bagging_accuracy3 = round(100 * accuracy_score(y_test, KNN), 2)
print('The accuracy of this model is Bagging Naive Bayes {} %.'.format(bagging_accuracy1))
print('The accuracy of this model is Bagging Decision Tree {} %.'.format(bagging_accuracy2))
print('The accuracy of this model is Bagging kNN {} %.'.format(bagging_accuracy3))

The accuracy of this model is Bagging Naive Bayes 100.0 %.
The accuracy of this model is Bagging Decision Tree 100.0 %.
The accuracy of this model is Bagging kNN 99.26 %.


In [114]:
import pickle

with open('normalisasiPendapatan','wb') as r:
  pickle.dump(scalerPendapatan,r)

with open('normalisasiDurasi','wb') as r:
  pickle.dump(scalerDurasi,r)

with open('normalisasiTanggungan','wb') as r:
  pickle.dump(scalerTanggungan,r)

with open('bayes.pickle','wb') as r:
  pickle.dump(naive_bayes_classifier,r)

with open('knn.pickle','wb') as r:
  pickle.dump(knn,r)

with open('decisiontree.pickle','wb') as r:
  pickle.dump(decision_tree_classifier,r)

In [115]:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
model = GaussianNB()
model.fit(X_train, y_train)
# save the model to disk
filename = 'modelCreditScore.pkl'
pickle.dump(model, open(filename, 'wb'))