# Sample Dataset

In [25]:
import pandas as pd  #import module pandas

sensus = {'tinggi': [158, 170, 183, 191, 155, 163, 180, 158, 170], 
          'jk': ['pria', 'pria', 'pria', 'pria', 'wanita', 'wanita', 'wanita', 'wanita', 'wanita'],
          'berat': [64, 86, 84, 80, 49, 59, 67, 54, 67]}

sensus_df = pd.DataFrame(sensus) #pembentukan pandas data frame
sensus_df

Unnamed: 0,tinggi,jk,berat
0,158,pria,64
1,170,pria,86
2,183,pria,84
3,191,pria,80
4,155,wanita,49
5,163,wanita,59
6,180,wanita,67
7,158,wanita,54
8,170,wanita,67


# Regression dengan KNN

# Features & Target

In [26]:
import numpy as np  #import module numpy

X_train = np.array(sensus_df[['tinggi', 'jk']])  #mengkonversikan data ke numpy array
y_train = np.array(sensus_df['berat'])

print(f'X_train:\n{X_train}\n')
print(f'y_train: {y_train}')

X_train:
[[158 'pria']
 [170 'pria']
 [183 'pria']
 [191 'pria']
 [155 'wanita']
 [163 'wanita']
 [180 'wanita']
 [158 'wanita']
 [170 'wanita']]

y_train: [64 86 84 80 49 59 67 54 67]


# Preprocess Dataset: Konversi Label menjadi Numerik Biner

In [27]:
X_train_transposed = np.transpose(X_train) #transposed nilai x_train
#transpoese : menukar komlom jadi baris, baris jadi kolom

print(f'X_train:\n{X_train}\n')
print(f'X_train_transposed:\n{X_train_transposed}')

X_train:
[[158 'pria']
 [170 'pria']
 [183 'pria']
 [191 'pria']
 [155 'wanita']
 [163 'wanita']
 [180 'wanita']
 [158 'wanita']
 [170 'wanita']]

X_train_transposed:
[[158 170 183 191 155 163 180 158 170]
 ['pria' 'pria' 'pria' 'pria' 'wanita' 'wanita' 'wanita' 'wanita'
  'wanita']]


In [28]:
from sklearn.preprocessing import LabelBinarizer #import LabelBinarizer, mengkonversikan ke nilai biner

lb = LabelBinarizer() #membentuk objek
jk_binarised = lb.fit_transform(X_train_transposed[1]) #menerapkan transpose pada index ke-1

print(f'jk: {X_train_transposed[1]}\n')
print(f'jk_binarised:\n{jk_binarised}')

jk: ['pria' 'pria' 'pria' 'pria' 'wanita' 'wanita' 'wanita' 'wanita' 'wanita']

jk_binarised:
[[0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]]


In [29]:
jk_binarised = jk_binarised.flatten() #mengkonversikan array multi dimensi menjadi array dimensi tunggal
jk_binarised

array([0, 0, 0, 0, 1, 1, 1, 1, 1])

In [30]:
X_train_transposed[1] = jk_binarised #menampung pada nilai variabel x_trains_transposed index ke-1
X_train = X_train_transposed.transpose() #mentransposed kembali agar menukar posisi kolom dan baris

print(f'X_train_transposed:\n{X_train_transposed}\n')
print(f'X_train:\n{X_train}')

X_train_transposed:
[[158 170 183 191 155 163 180 158 170]
 [0 0 0 0 1 1 1 1 1]]

X_train:
[[158 0]
 [170 0]
 [183 0]
 [191 0]
 [155 1]
 [163 1]
 [180 1]
 [158 1]
 [170 1]]


# Training KNN Regression Model

In [31]:
from sklearn.neighbors import KNeighborsRegressor #import KNeighborRegressor

K = 3 #jumlah banyak neighbors
model = KNeighborsRegressor(n_neighbors=K) #membentuk objek model
model.fit(X_train, y_train) #training model

KNeighborsRegressor(n_neighbors=3)

# Prediksi Berat Badan

In [32]:
X_new = np.array([[155, 1]]) #data tinggi badan 155cm dan jenis kelamin wanita(1), budle 2 dimensi
X_new

array([[155,   1]])

In [33]:
y_pred = model.predict(X_new) #memprediksi menggunakan .predict()
y_pred

array([55.66666667])

# Evaluasi KNN Regression Model

In [34]:
X_test = np.array([[168, 0], [180, 0], [160, 1], [169, 1]])
y_test = np.array([65, 96, 52, 67])
    
print(f'X_test:\n{X_test}\n')
print(f'y_test: {y_test}')

X_test:
[[168   0]
 [180   0]
 [160   1]
 [169   1]]

y_test: [65 96 52 67]


In [35]:
y_pred = model.predict(X_test) 
y_pred 

array([70.66666667, 79.        , 59.        , 70.66666667])

# Coefficient of Determination

In [36]:
from sklearn.metrics import r2_score #import matrics

r_squared = r2_score(y_test, y_pred)

print(f'R-squared: {r_squared}') #nilai model semakin mendekati 1 baik, semakin mendekati 0/bahkan negatif buruk.

R-squared: 0.6290565226735438


# Mean Absolute Error (MAE) atau Mean Absolute Deviation (MAD)

In [37]:
from sklearn.metrics import mean_absolute_error #import mean_absolut_error

MAE = mean_absolute_error(y_test, y_pred) 

print(f'MAE: {MAE}')

MAE: 8.333333333333336


# Mean Squared Error (MSE) atau Mean Squared Deviation (MSD)

In [38]:
from sklearn.metrics import mean_squared_error #import mean_squared_error

MSE = mean_squared_error(y_test, y_pred)

print(f'MSE: {MSE}')

MSE: 95.8888888888889


# Permasalahan Scaling pada Features

In [39]:
from scipy.spatial.distance import euclidean #import euvlidean

# tinggi dalam milimeter
X_train = np.array([[1700, 0], [1600, 1]]) #sekummpulan feature training set dalam satuan milimeter
X_new = np.array([[1640, 0]]) #sekumpulan nilai yang akan diprediksi dalam satuan milimeter

[euclidean(X_new[0], d) for d in X_train]

[60.0, 40.01249804748511]

In [40]:
[60.0, 40.01249804748511]
# tinggi dalam meter
X_train = np.array([[1.7, 0], [1.6, 1]]) #sekummpulan feature training set dalam meter
X_new = np.array([[1.64, 0]]) #sekumpulan nilai yang akan diprediksi dalam satuan meter

[euclidean(X_new[0], d) for d in X_train]

[0.06000000000000005, 1.0007996802557444]

# Menerapkan Standard Scaler (Standard Score atau Z-Score)

In [41]:
from sklearn.preprocessing import StandardScaler #import standardscaler

ss = StandardScaler() #membentuk objek

In [42]:
# tinggi dalam milimeter
X_train = np.array([[1700, 0], [1600, 1]]) #membentuk nilai feature
X_train_scaled = ss.fit_transform(X_train) #transform pada nilai x_train
print(f'X_train_scaled:\n{X_train_scaled}\n')

X_new = np.array([[1640, 0]]) #transform pada nilai x_new
X_new_scaled = ss.transform(X_new) #transform pada nilai x_new
print(f'X_new_scaled: {X_new_scaled}\n')

jarak = [euclidean(X_new_scaled[0], d) for d in X_train_scaled] #euclidean distance
print(f'jarak: {jarak}')

X_train_scaled:
[[ 1. -1.]
 [-1.  1.]]

X_new_scaled: [[-0.2 -1. ]]

jarak: [1.2, 2.154065922853802]


In [43]:
# tinggi dalam meter
X_train = np.array([[1.7, 0], [1.6, 1]])
X_train_scaled = ss.fit_transform(X_train)
print(f'X_train_scaled:\n{X_train_scaled}\n')

X_new = np.array([[1.64, 0]])
X_new_scaled = ss.transform(X_new)
print(f'X_new_scaled: {X_new_scaled}\n')

jarak = [euclidean(X_new_scaled[0], d) for d in X_train_scaled]
print(f'jarak: {jarak}')

X_train_scaled:
[[ 1. -1.]
 [-1.  1.]]

X_new_scaled: [[-0.2 -1. ]]

jarak: [1.2000000000000026, 2.1540659228538006]


# Menerapkan Features Scaling pada KNN

Dataset

In [44]:

# Training Set
X_train = np.array([[158, 0], [170, 0], [183, 0], [191, 0], [155, 1], [163, 1],
                    [180, 1], [158, 1], [170, 1]]) #menampung training set

y_train = np.array([64, 86, 84, 80, 49, 59, 67, 54, 67]) #menampung target training set

# Test Set
X_test = np.array([[168, 0], [180, 0], [160, 1], [169, 1]])
y_test = np.array([65, 96, 52, 67])

# Features Scaling (Standard Scaler)

In [45]:
X_train_scaled = ss.fit_transform(X_train) #scaling feature
X_test_scaled = ss.transform(X_test) #transform

print(f'X_train_scaled:\n{X_train_scaled}\n')
print(f'X_test_scaled:\n{X_test_scaled}\n')

X_train_scaled:
[[-0.9908706  -1.11803399]
 [ 0.01869567 -1.11803399]
 [ 1.11239246 -1.11803399]
 [ 1.78543664 -1.11803399]
 [-1.24326216  0.89442719]
 [-0.57021798  0.89442719]
 [ 0.86000089  0.89442719]
 [-0.9908706   0.89442719]
 [ 0.01869567  0.89442719]]

X_test_scaled:
[[-0.14956537 -1.11803399]
 [ 0.86000089 -1.11803399]
 [-0.82260955  0.89442719]
 [-0.06543485  0.89442719]]



# Training & Evaluasi Model

In [46]:
model.fit(X_train_scaled, y_train) #training model
y_pred = model.predict(X_test_scaled) #prediksi training model

#hitung nilai error
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print(f'MAE: {MAE}')
print(f'MSE: {MSE}')

MAE: 7.583333333333336
MSE: 85.13888888888893
