In [1]:
#Sample Dataset

import pandas as pd

sensus = {
    'Tinggi': [155, 163, 170, 180, 190, 165, 175, 185, 158],
    'Berat': [40, 55, 70, 80, 35, 48, 60, 65, 50],
    'JK': [
        'Pria', 'Pria', 'Pria', 'Pria', 'Wanita', 'Wanita', 'Wanita', 'Wanita', 'Wanita'
    ]
}

sensus_df = pd.DataFrame(sensus)
sensus_df

Unnamed: 0,Tinggi,Berat,JK
0,155,40,Pria
1,163,55,Pria
2,170,70,Pria
3,180,80,Pria
4,190,35,Wanita
5,165,48,Wanita
6,175,60,Wanita
7,185,65,Wanita
8,158,50,Wanita


In [2]:
#Feature & Target

import numpy as np

X_train = np.array(sensus_df[['Tinggi','JK']])
y_train = np.array(sensus_df['Berat'])

print(f'X_train:\n{X_train}\n')
print(f'y_train: {y_train}')

X_train:
[[155 'Pria']
 [163 'Pria']
 [170 'Pria']
 [180 'Pria']
 [190 'Wanita']
 [165 'Wanita']
 [175 'Wanita']
 [185 'Wanita']
 [158 'Wanita']]

y_train: [40 55 70 80 35 48 60 65 50]


In [3]:
#Preprocess Dataset: Konversi Label menjadi Numerik Biner

X_train_transposed = np.transpose(X_train) #proses transpose ini akan mengubah posisi baris menjadi kolom & sebaliknya

print(f'X_train:\n{X_train}\n')
print(f'X_train_transposed:\n{X_train_transposed}')

X_train:
[[155 'Pria']
 [163 'Pria']
 [170 'Pria']
 [180 'Pria']
 [190 'Wanita']
 [165 'Wanita']
 [175 'Wanita']
 [185 'Wanita']
 [158 'Wanita']]

X_train_transposed:
[[155 163 170 180 190 165 175 185 158]
 ['Pria' 'Pria' 'Pria' 'Pria' 'Wanita' 'Wanita' 'Wanita' 'Wanita'
  'Wanita']]


In [4]:
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
jk_binarised = lb.fit_transform(X_train_transposed[1])

print(f'JK: {X_train_transposed[1]}\n')
print(f'JK_binarised:\n{jk_binarised}')

JK: ['Pria' 'Pria' 'Pria' 'Pria' 'Wanita' 'Wanita' 'Wanita' 'Wanita' 'Wanita']

JK_binarised:
[[0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]]


In [5]:
jk_binarised = jk_binarised.flatten() #method flatten digunakan utk mengkonversikan array multi dimensi mnjdi array dimensi tunggal 
jk_binarised

array([0, 0, 0, 0, 1, 1, 1, 1, 1])

In [6]:
X_train_transposed[1] = jk_binarised
X_train = X_train_transposed.transpose()

print(f'X_train_transposed:\n{X_train_transposed}\n')
print(f'X_train:\n{X_train}')

X_train_transposed:
[[155 163 170 180 190 165 175 185 158]
 [0 0 0 0 1 1 1 1 1]]

X_train:
[[155 0]
 [163 0]
 [170 0]
 [180 0]
 [190 1]
 [165 1]
 [175 1]
 [185 1]
 [158 1]]


In [7]:
#Training KNN Regression Model

from sklearn.neighbors import KNeighborsRegressor

K = 3
model = KNeighborsRegressor(n_neighbors=K)
model.fit(X_train, y_train)

KNeighborsRegressor(n_neighbors=3)

In [10]:
#Prediksi Berat Badan

X_new = np.array([[168, 1]])
X_new

array([[168,   1]])

In [11]:
y_pred = model.predict(X_new)
y_pred

array([57.66666667])

In [14]:
#Evaluasi KNN Regression Model

X_test = np.array([[170, 0], [178, 1], [165, 0], [182, 1]])  #Features
y_test = np.array([60, 85, 55, 70])                          #Target

print(f'X_test:\n{X_test}\n')
print(f'y_test: {y_test}')

X_test:
[[170   0]
 [178   1]
 [165   0]
 [182   1]]

y_test: [60 85 55 70]


In [15]:
y_pred = model.predict(X_test)
y_pred

array([59.33333333, 68.33333333, 57.66666667, 68.33333333])

In [16]:
#Coefficient of Determination

from sklearn.metrics import r2_score

r_squared = r2_score(y_test, y_pred)

print(f'R-squared: {r_squared}') #R-Square mendekati nilai 1 akan menjadi lebih baik

R-squared: 0.45121693121693096


In [17]:
#Mean Absolute Error (MAE) atau Mean Absolute Deviation (MAD)

from sklearn.metrics import mean_absolute_error

MAE = mean_absolute_error(y_test, y_pred)

print(f'MAE: {MAE}')  #Semakin kecil nilai MAE-nya akan mengidenkasikan identitas model yang baik

MAE: 5.416666666666668


In [18]:
from sklearn.metrics import mean_squared_error

MSE = mean_squared_error(y_test, y_pred)

print(f'MSE: {MSE}')    #Nilai MSE akan selalu lebih besar dibandingkan MAE

MSE: 72.02777777777781


In [19]:
#Permasalahan Scaling Pada Features

from scipy.spatial.distance import euclidean

# tinggi dalam milimeter
X_train = np.array([[1750, 1], [1650, 0]])  #Features utk training set
X_new = np.array([[1620, 1]])               #Features utk data point yg diprediksi

[euclidean(X_new[0], d) for d in X_train]

[130.0, 30.01666203960727]

In [20]:
# tinggi dalam meter

X_train = np.array([[1.75, 1], [1.65, 0]])
X_new = np.array([[1.62, 1]])

[euclidean(X_new[0], d) for d in X_train]

[0.1299999999999999, 1.0004498987955368]

In [21]:
# Menerapkan Standard Scaler (Standard Score atau Z-Score)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler()

In [24]:
# tinggi dalam milimeter

X_train = np.array([[1750, 1], [1650, 0]])
X_train_scaled = ss.fit_transform(X_train)
print(f'X_train_scaled:\n{X_train_scaled}\n')

X_new = np.array([[1620, 1]])
X_new_scaled = ss.transform(X_new)
print(f'X_new_scaled: {X_new_scaled}\n')

jarak = [euclidean(X_new_scaled[0], d) for d in X_train_scaled]
print(f'jarak: {jarak}')

X_train_scaled:
[[ 1.  1.]
 [-1. -1.]]

X_new_scaled: [[-1.6  1. ]]

jarak: [2.6, 2.08806130178211]


In [25]:
# tinggi dalam meter

X_train = np.array([[1.75, 1], [1.65, 0]])
X_train_scaled = ss.fit_transform(X_train)
print(f'X_train_scaled:\n{X_train_scaled}\n')

X_new = np.array([[1.62, 1]])
X_new_scaled = ss.transform(X_new)
print(f'X_new_scaled: {X_new_scaled}\n')

jarak = [euclidean(X_new_scaled[0], d) for d in X_train_scaled]
print(f'jarak: {jarak}')

X_train_scaled:
[[ 1.  1.]
 [-1. -1.]]

X_new_scaled: [[-1.6  1. ]]

jarak: [2.5999999999999956, 2.088061301782109]


In [27]:
#Menerapkan Features Scaling pada KNN

# Training set
X_train = np.array([[155, 0], [163, 0] ,[170, 0] ,[180, 0] ,[190, 1] ,[165, 1] , [175, 1], [185, 1],  [158, 1]])
y_train = np.array([40, 55, 70, 80, 35, 48, 60, 65, 50])

# Test Set
X_test = np.array([[170, 0], [178, 1], [165, 0], [182, 1]])
y_test = np.array([60, 85, 55, 70])   

In [28]:
# Features Scaling (Standard Scaler)

X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

print(f'X_train_scaled:\n{X_train_scaled}\n')
print(f'X_test_scaled:\n{X_test_scaled}\n')

X_train_scaled:
[[-1.41700832 -1.11803399]
 [-0.7182097  -1.11803399]
 [-0.1067609  -1.11803399]
 [ 0.76673738 -1.11803399]
 [ 1.64023566  0.89442719]
 [-0.54351004  0.89442719]
 [ 0.32998824  0.89442719]
 [ 1.20348652  0.89442719]
 [-1.15495884  0.89442719]]

X_test_scaled:
[[-0.1067609  -1.11803399]
 [ 0.59203772  0.89442719]
 [-0.54351004 -1.11803399]
 [ 0.94143704  0.89442719]]



In [31]:
# Training & Evaluasi Model

model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print(f'MAE: {MAE}')
print(f'MSE: {MSE}')

MAE: 14.166666666666664
MSE: 337.4999999999999
