In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score, confusion_matrix

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Membaca file CSV
df = pd.read_csv('pinjaman.csv')

# Menampilkan 5 baris pertama
df.head()

Unnamed: 0,Usia,Pendapatan,Status_Perkawinan,Jumlah_Pinjaman,Durasi_Pinjaman,Status_Pekerjaan,Lulus_Kredit
0,25,40,Belum Menikah,50,5,Karyawan Tetap,Tidak Layak
1,45,100,Menikah,80,10,Wirausaha,Layak
2,35,60,Menikah,100,15,Karyawan Kontrak,Tidak Layak
3,28,75,Belum Menikah,60,7,Karyawan Tetap,Layak
4,50,120,Menikah,150,20,Wirausaha,Layak


In [3]:
df_encode = pd.DataFrame(df)

In [4]:
df_encode['Status_Perkawinan'] = df_encode['Status_Perkawinan'].map({'Menikah':1, 'Belum Menikah':0})
df_encode['Status_Pekerjaan'] = df_encode['Status_Pekerjaan'].map({'Karyawan Tetap':0, 'Wirausaha':1, 'Karyawan Kontrak':2, 'Pensiunan':3})
df_encode['Lulus_Kredit'] = df_encode['Lulus_Kredit'].map({'Layak':1, 'Tidak Layak':0})

In [5]:
df_encode.head()

Unnamed: 0,Usia,Pendapatan,Status_Perkawinan,Jumlah_Pinjaman,Durasi_Pinjaman,Status_Pekerjaan,Lulus_Kredit
0,25,40,0,50,5,0,0
1,45,100,1,80,10,1,1
2,35,60,1,100,15,2,0
3,28,75,0,60,7,0,1
4,50,120,1,150,20,1,1


In [6]:
X_encode = df_encode[['Usia', 'Pendapatan', 'Status_Perkawinan', 'Jumlah_Pinjaman', 'Durasi_Pinjaman', 'Status_Pekerjaan']]
y_encode = df_encode['Lulus_Kredit']

In [7]:
X_train_cat, X_test_cat, y_train_cat, y_test_cat = train_test_split(X_encode, y_encode, test_size=0.3, random_state=42, stratify=y_encode)

In [8]:
for col in X_encode.columns:
    print(f"{col}:")
    print("Train:", sorted(X_train_cat[col].unique()))
    print("Test :", sorted(X_test_cat[col].unique()))
    print()


Usia:
Train: [22, 25, 26, 27, 28, 29, 30, 34, 35, 37, 39, 40, 42, 45, 47, 48, 49, 50, 53, 55, 60]
Test : [23, 31, 32, 33, 36, 38, 41, 44, 46]

Pendapatan:
Train: [30, 40, 45, 50, 52, 60, 65, 75, 78, 85, 92, 95, 100, 105, 110, 120, 125, 130, 140]
Test : [35, 55, 60, 70, 80, 88, 90, 95, 110]

Status_Perkawinan:
Train: [0, 1]
Test : [0, 1]

Jumlah_Pinjaman:
Train: [20, 35, 40, 48, 50, 55, 60, 65, 75, 80, 85, 95, 100, 115, 120, 125, 140, 150, 160, 180, 200]
Test : [30, 45, 50, 70, 90, 100, 105, 110, 120]

Durasi_Pinjaman:
Train: [2, 4, 5, 6, 7, 8, 10, 11, 12, 15, 17, 18, 20, 25]
Test : [3, 5, 6, 9, 10, 12, 14, 15]

Status_Pekerjaan:
Train: [0, 1, 2, 3]
Test : [0, 1, 2]



In [9]:
cat_nb_model = CategoricalNB()
cat_nb_model.fit(X_train_cat, y_train_cat)

In [10]:
y_pred_cat = cat_nb_model.predict(X_test_cat)

In [11]:
new_data = np.array([[20,101,0,80,7,0]])
new_prediction = cat_nb_model.predict(new_data)



In [12]:
(new_prediction)

array([0], dtype=int64)

In [13]:
if (new_prediction[0] == 0):
    print('Tidak Lulus Kredit')
else :
    print('Lulus Kredit')

Tidak Lulus Kredit


In [14]:
cat_accuracy = accuracy_score(y_test_cat, y_pred_cat)
cat_accuracy

1.0

In [15]:
cat_conf_matrix = confusion_matrix(y_test_cat, y_pred_cat)

In [16]:
(cat_accuracy, cat_conf_matrix)

(1.0,
 array([[3, 0],
        [0, 6]], dtype=int64))

In [17]:
import pickle
filename = 'naive_bayes_models.pkl'
pickle.dump(cat_nb_model, open(filename, 'wb'))