In [None]:
# 1. Import dan Membuat DataFrame
# Program Prediksi Kepemilikan Rumah
import pandas as pd

# Data baru dengan tema Kepemilikan Rumah
rumah = {
    'usia': [28, 35, 42, 21, 29, 45, 39, 26, 32, 37, 20],
    'gaji': [5000000, 7500000, 10000000, 2500000, 5600000, 8000000, 6200000, 4500000, 6900000, 7200000, 3800000],
    'status': ['memiliki rumah', 'memiliki rumah', 'memiliki rumah', 'tidak memiliki rumah', 'tidak memiliki rumah', 'memiliki rumah', 'memiliki rumah', 'tidak memiliki rumah', 'tidak memiliki rumah', 'memiliki rumah', 'tidak memiliki rumah']
}

rumah_df = pd.DataFrame(rumah)
rumah_df

In [None]:
# 2. Visualisasi Data
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
for jk, d in rumah_df.groupby('status'):
    ax.scatter(d['usia'], d['gaji'], label=jk)

plt.legend(loc='upper left')
plt.title("Sebaran data Kepemilikan Rumah, Usia dan Gaji")
plt.xlabel('Usia')
plt.ylabel('Gaji')
plt.grid(True)
plt.show()

In [None]:
# 3. Preprocessing Dataset
import numpy as np

x_train = np.array(rumah_df[['usia', 'gaji']])
y_train = np.array(rumah_df['status'])

print(f'x_train : \n {x_train}\n')
print(f'y_train: \n {y_train}\n')

In [None]:
# 4. Label Binarizer
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
print(f'y_train : {y_train}')

In [None]:
# 5. Flatten y_train
y_train = y_train.flatten()
print(f'y_train : {y_train}')

In [None]:
# 6. KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

# Classification Task (Digunakan untuk jumlah tetangga terdekat)
K = 3
model = KNeighborsClassifier(n_neighbors=K)
model.fit(x_train, y_train)

In [None]:
# 7. Data Baru untuk Prediksi
usia = 23
gaji = 7_000_000

x_new = np.array([usia, gaji]).reshape(1,-1)
x_new

In [None]:
# 8. Prediksi Status
y_new = model.predict(x_new)
y_new

In [None]:
# 9. Invers Transformasi Prediksi
lb.inverse_transform(y_new)

In [None]:
# 10. Visualisasi dengan Titik Baru
fig, ax = plt.subplots()
for jk, d in rumah_df.groupby('status'):
    ax.scatter(d['usia'], d['gaji'], label=jk)

plt.scatter(usia, gaji, marker='D', color='red', label='misterius')

plt.legend(loc='upper left')
plt.title("Sebaran data Kepemilikan Rumah, Usia dan Gaji")
plt.xlabel('Usia')
plt.ylabel('Gaji')
plt.grid(True)
plt.show()

In [None]:
# 11. Kalkulasi Euclidean Distance
# Kalkulasi Euclidean Distance
misterius = np.array([usia, gaji])
misterius

In [None]:
#12. Melihat x_train
x_train

In [None]:
# 13. Menghitung Jarak Data
from scipy.spatial.distance import euclidean

jarak_data = [euclidean(misterius, d) for d in x_train]
jarak_data

In [None]:
# 14. Menambah Kolom Jarak dan Mengurutkan
rumah_df['jarak'] = jarak_data
rumah_df.sort_values(['jarak'])

In [None]:
# 15. Evaluasi Model dengan Testing Set
# EVALUASI KNN Classification Model
# Testing Set

x_test = np.array([[30, 5500000], [40, 9000000], [25, 3000000],[38, 7000000]])
y_test = lb.transform(np.array(['memiliki rumah', 'memiliki rumah', 'tidak memiliki rumah', 'tidak memiliki rumah'])).flatten()

print(f'x_test : {x_test}\n')
print(f'y_test : {y_test}\n')

In [None]:
# 16. Prediksi terhadap Testing Set
# Prediksi terhadap Testing set
x_test = np.array([[30, 5500000], [40, 9000000], [25, 3000000],[38, 7000000]])
y_test = lb.transform(np.array(['memiliki rumah', 'memiliki rumah', 'tidak memiliki rumah', 'tidak memiliki rumah'])).flatten()
y_prediction = model.predict(x_test)
y_prediction

In [None]:
# 17. Menghitung Akurasi
# METRIK EVALUASI
# Accuracy - Akurasi
# accuracy = (tp + tn) / tp + tn + fp + fn

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_prediction)
print(f'Accuracy : {accuracy}')

In [None]:
# 18. Menghitung Presisi
# Precission - Presisi
# precission = tp / (tp + fp)

from sklearn.metrics import precision_score

precission = precision_score(y_test, y_prediction)

print(f'Precission : {precission}')

In [None]:
# 19. Menghitung Recall
# Recall
# recall = tp / (tp + fn)

from sklearn.metrics import recall_score

recall = recall_score(y_test, y_prediction)

print(f'Recall : {recall}')

In [None]:
# 20. Menghitung F1 Score
# F1 Score
# f1 = 2 x (precission x recall / precission + recall)

from sklearn.metrics import f1_score

f1 = f1_score(y_test, y_prediction)

print(f'F1-Score : {f1}')

In [None]:
# 21. Classification Report
# CLASSIFICATION REPORT 

from sklearn.metrics import classification_report

cls_report = classification_report(y_test, y_prediction)

print(f'Classification Report :\n {cls_report}')

In [None]:
# 22. Matthews Correlation Coefficient (MCC)
# Matthews Correlation Coefficient (MCC)
# MCC = (tp x tn + fp x fn) / sqrt((tp+fp) x (tp + fn) x (tn + fp) x (tn + fn))
# Batas : -1 (terburuk) sampai ke 1 (terbaik)

from sklearn.metrics import matthews_corrcoef

MCC = matthews_corrcoef(y_test, y_prediction)
print(f'MCC : {MCC}')