# 0. Import Library

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns

import scipy.stats

![](https://onlinelibrary.wiley.com/cms/asset/ea1d3bd8-afd7-4914-b645-74d424b6690d/advs3654-fig-0002-m.jpg)

# 1. Input Data

In [None]:
df = pd.read_csv('simple_loan.csv')

# 2. Data Preprocessing

## 2.1 Exploratory Data Analysis (EDA)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

## 2.2 Data Cleaning

### - Missing Value

In [None]:
df.isnull().sum()

### - Inconsistent Data

In [None]:
df.dtypes

#### age

In [None]:
df.groupby('age')['age'].count()

#### own_house

In [None]:
df.groupby('own_house')['own_house'].count()

#### credit

In [None]:
df.groupby('credit')['credit'].count()

#### target

In [None]:
df.groupby('target')['target'].count()

### - Outliers

In [None]:
#ไม่มี

## 2.3 Data Transformation

![](https://images.datacamp.com/image/upload/v1677149248/label_encoding_d4ae789503.png?updated_at=2023-02-23T10:47:28.618Z)

In [None]:
df.dtypes

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
def labelEncode(data, columns):
    columns_ = []
    for i in columns:
        lb = LabelEncoder()
        lb_ = lb.fit_transform(data[i].astype(str))
        data[i+'_Encoded']=lb_
        columns_.append(i+'_Encoded')
    return columns_,lb

In [None]:
columns = ['age', 'employed', 'own_house', 'credit', 'target']
enc_columns, lb=labelEncode(df, columns)

In [None]:
df.head(10)

In [None]:
for feat in enc_columns:
    print(feat[:-8])
    print(' ',df[feat[:-8]].unique())
    print(' ',df[feat].unique())
    print('  -------')

## [Optional] Export เป็นไฟล์เมื่อทำ Data Cleaning เสร็จแล้ว

In [None]:
df.to_csv('รหัส.csv',index=False)
df.to_excel('รหัส.xlsx',index=False)

## 2.4 ตรวจสอบค่าสหสัมพันธ์ของตัวแปร x, y

In [None]:
df.corr()

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
sns.heatmap(df.corr(), annot=True);

## 2.5 การกำหนด Feature / Target

In [None]:
DataMatrix = df.values
DataMatrix.shape

In [None]:
feature_cols = ['age_Encoded', 'employed_Encoded', 'own_house_Encoded', 'credit_Encoded']
X = df[feature_cols].values
Y = df.target_Encoded.values

## 2.6 Data Preparation (แบ่งข้อมูลสำหรับ Training / Testing)

In [None]:
from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 3. Modelling: k-Nearest Neighbors

![](https://miro.medium.com/v2/resize:fit:1151/0*ItVKiyx2F3ZU8zV5)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

## 3.1 Training Data

In [None]:
model = KNeighborsClassifier(n_neighbors=3)

In [None]:
model.fit(X_Train, Y_Train)

In [None]:
model.score(X_Train, Y_Train)

## 3.2 Predict Data

In [None]:
y_predict = model.predict(X_Test)

In [None]:
y_predict_class = lb.inverse_transform(y_predict)

In [None]:
for res, label in zip(y_predict, y_predict_class):
    print('Result: {} - {}'.format(res, label))

# 4. Model Evaluation

![](https://2.bp.blogspot.com/-EvSXDotTOwc/XMfeOGZ-CVI/AAAAAAAAEiE/oePFfvhfOQM11dgRn9FkPxlegCXbgOF4QCLcBGAs/s1600/confusionMatrxiUpdated.jpg)

![](https://4.bp.blogspot.com/-jNSCuV2OtOY/XMfoQka3qaI/AAAAAAAAEjY/TiFX8HALafAVzu_zp5njDqOoGCqzyVouwCLcBGAs/s640/1_001%2Bnew.jpg)

In [None]:
from sklearn import metrics

In [None]:
cnf_matrix = metrics.confusion_matrix(Y_Test, y_predict)
cnf_matrix

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = cnf_matrix, display_labels = [0, 1])
cm_display.plot()
plt.show()

In [None]:
from sklearn.metrics import classification_report
target_names = ['No', 'Yes']
print(classification_report(Y_Test, y_predict, target_names=target_names))

# [Optional] Save Machine Learning Models

In [None]:
import pickle
filename = 'pickle_รหัสนศ.sav'
pickle.dump(model, open(filename, 'wb'))