### 5.1) Import modules

In [1]:
from sklearn.datasets import load_iris, load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import neighbors

### 5.2) Load and split Iris flower dataset for classification
https://scikit-learn.org/stable/datasets/index.html#iris-dataset

In [2]:
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0)

print('Training features size: ', X_train.shape)
print('Testing features size: ', X_test.shape)
print('Training targets size: ', y_train.shape)
print('Testing targets size: ', y_test.shape)

Training features size:  (120, 4)
Testing features size:  (30, 4)
Training targets size:  (120,)
Testing targets size:  (30,)


### 5.3) Standardize training features

In [3]:
print('Original training feature means: ', X_train.mean(axis=0))
print('Original training feature SDs: ', X_train.std(axis=0))

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train) # fit the scaler to the training features, and also transform them

print('Scaled training feature means: ', X_train_scaled.mean(axis=0))
print('Scaled training feature SDs: ', X_train_scaled.std(axis=0))

Original training feature means:  [5.88083333 3.05333333 3.81583333 1.23166667]
Original training feature SDs:  [0.84688604 0.43010335 1.77745116 0.7721165 ]
Scaled training feature means:  [ 0.00000000e+00 -7.49863135e-16  4.25585493e-16  2.57201667e-16]
Scaled training feature SDs:  [1. 1. 1. 1.]


### 5.4) Standardize testing features

In [4]:
print('Original testing feature means: ', X_test.mean(axis=0))
print('Original testing feature SDs: ', X_test.std(axis=0))

X_test_scaled = scaler.transform(X_test)

print('Scaled testing feature means: ', X_test_scaled.mean(axis=0))
print('Scaled testing feature SDs: ', X_test_scaled.std(axis=0))

Original testing feature means:  [5.69333333 3.07333333 3.52666667 1.07      ]
Original testing feature SDs:  [0.71317755 0.45087569 1.66531946 0.69289249]
Scaled testing feature means:  [-0.22139933  0.04650045 -0.16268614 -0.20938118]
Scaled testing feature SDs:  [0.84211749 1.04829617 0.93691432 0.89739371]


### 5.5) Define accuracy function

In [5]:
def accuracy(y_true, y_pred):
    
    m = len(y_true)
    num_correct = 0
    
    for i in range(m):
        
        if y_pred[i] == y_true[i]:
            
            num_correct += 1
    
    accuracy = num_correct / m
    
    return accuracy

### 5.6) Load, train and evaluate K-Nearest Neighbours classifier

In [6]:
knn_clf = neighbors.KNeighborsClassifier(n_neighbors=2) # n_neighbor is equivalent to "k"
knn_clf.fit(X_train_scaled, y_train)

y_pred = knn_clf.predict(X_test_scaled)
knn_clf_accuracy = accuracy(y_test, y_pred)

print('K-Nearest Neighbours accuracy: ', knn_clf_accuracy)

K-Nearest Neighbours accuracy:  0.9666666666666667


### 5.7) Load Boston house price dataset

In [7]:
X, y = load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0)

print('Training features size: ', X_train.shape)
print('Testing features size: ', X_test.shape)
print('Training targets size: ', y_train.shape)
print('Testing targets size: ', y_test.shape)

Training features size:  (404, 13)
Testing features size:  (102, 13)
Training targets size:  (404,)
Testing targets size:  (102,)


### 5.8) Standardize training and testing features

In [8]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 5.9) Define error function

In [9]:
def MSE(y_true, y_pred):
    
    m = len(y_true)
    mse = sum(((y_true-y_pred)**2) / m)
    
    return mse

### 5.10) Load, train and evaluate K-Nearest Neighbours regression model

In [10]:
knn_reg = neighbors.KNeighborsRegressor(n_neighbors=5)
knn_reg.fit(X_train_scaled, y_train)

y_pred = knn_reg.predict(X_test_scaled)
knn_reg_error = MSE(y_test, y_pred)

print('K-Nearest Neighbours mean squared error: ', knn_reg_error)

K-Nearest Neighbours mean squared error:  35.674415686274514
