#### Import libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
train_ds = pd.read_csv("train.csv")
test_ds = pd.read_csv("test.csv")

In [3]:
train_ds.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,OUTCOME
0,3,6,9,9,2,1,8,7,2,10,5.548363
1,4,7,10,5,6,4,7,5,2,10,6.062914
2,6,4,9,2,1,3,7,7,8,3,4.832181
3,10,10,5,4,5,9,1,7,5,2,5.917978
4,3,2,2,9,10,4,10,3,8,1,5.311765


In [4]:
test_ds.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,OUTCOME
0,9,9,3,3,2,6,3,10,9,1,5.498661
1,8,1,2,9,9,4,7,6,3,3,5.289798
2,3,5,6,5,10,6,4,7,1,7,5.314285
3,2,9,5,3,3,3,9,9,8,3,5.076739
4,3,8,6,1,4,1,1,4,5,10,4.285061


In [5]:
train_data = train_ds.drop("OUTCOME", axis=1).values
print(train_data)

[[ 3  6  9 ...  7  2 10]
 [ 4  7 10 ...  5  2 10]
 [ 6  4  9 ...  7  8  3]
 ...
 [ 1  1 10 ...  9 10  3]
 [ 1  7  4 ...  6  9  4]
 [ 2  5 10 ...  5  2  2]]


In [6]:
test_data = test_ds.drop("OUTCOME", axis=1).values
print(test_data)

[[ 9  9  3  3  2  6  3 10  9  1]
 [ 8  1  2  9  9  4  7  6  3  3]
 [ 3  5  6  5 10  6  4  7  1  7]
 [ 2  9  5  3  3  3  9  9  8  3]
 [ 3  8  6  1  4  1  1  4  5 10]
 [ 1  4  1  9  6  2  9  9  3  8]
 [ 1  6  6  5 10  9  3  4  4  1]
 [ 2  9  7  6  8  3  5  1  5  4]
 [ 1  5  6  1  8  5  7  5  7  2]
 [ 1  4  7  3  7  9  8  3  2  5]
 [ 6  6  3  5  2  7 10  5  9  5]
 [ 8  5  3  3  5  5  6 10  6  2]
 [ 7  9  7 10  3  7  1  6  3  1]
 [ 1  2  2  7  2  3  4  4  3  3]
 [ 1  7 10  8  1  7  6  8  8  3]
 [ 5  1 10  3  7 10  1  5  4  8]
 [ 2  4  8 10  6  4  3  7  2  4]
 [ 8 10  5  9  2  6  3  5  5  9]
 [ 8  1 10 10  1 10  5  8  3  2]
 [ 9  1  7  4  3  6  6  5  5  6]
 [ 4  3  4  4  9  6  8  4 10  5]
 [10 10  2  3  9  9  9  7  4  3]
 [10  5  8  7  7  5  2  5  4  8]
 [ 1  7 10  4  8  2  1  1  8  4]
 [ 9  1  7  1  8 10  2  9  1  6]
 [ 9  3  9  6  6  7  2  8  9 10]
 [ 8 10  3  9  5  4  5  2  5  4]
 [ 3  8  6  6  8  7  9  3  7  4]
 [ 6  1  3  3 10  1  1  9  7  4]
 [ 5  9  6  2  5  7  8  3  9  2]
 [ 7  2 10

In [7]:
train_ds.shape

(900, 11)

In [8]:
test_ds.shape

(100, 11)

In [9]:
from sklearn.neighbors import KNeighborsRegressor

def predict(train, test, k=5, metric="euclidean", outcome="OUTCOME"):
   
    # Function to calculate distance
    def distance(x, y):
        if metric == "euclidean":
            return np.sqrt(((x - y) ** 2).sum())
        elif metric == "manhattan":
            return np.abs(x - y).sum()
      

    # Function to find k nearest neighbor
    
    # Initialize the KNeighborsRegressor with k and the specified metric
    knn = KNeighborsRegressor(n_neighbors=k, metric=metric)

    # Fit the KNeighborsRegressor on the training data
    knn.fit(train_data, train[outcome])

    # Predict the outcome for test data
    predictions = knn.predict(test_data)

    return predictions



In [10]:
print(predict(train_ds, test_ds, k=5, metric = "euclidean", outcome = "OUTCOME"))

[5.95708742 5.48711889 5.83336967 5.26498737 4.83427261 5.07946697
 4.83832583 5.32063987 4.91940552 5.03729078 5.4523583  5.66689433
 5.5551017  3.46033861 5.68027577 5.53431886 4.71541007 5.72545251
 5.75817444 5.09269412 5.75321412 6.30085196 6.09351574 5.29001277
 5.31766601 6.67135636 5.7113609  5.83058767 4.69182012 5.82046444
 4.78644716 6.47826707 4.41666446 4.85776514 5.75234796 6.09946736
 5.68115193 5.12644609 4.73291228 5.63304818 5.0822997  5.78865311
 5.31148822 5.09156613 4.86505222 4.26147057 5.27020875 6.5931899
 4.65601551 5.0605146  4.54836247 4.70608756 5.81895687 6.28484934
 5.00283613 5.06127211 5.5319492  6.02816448 5.52443466 5.36907615
 6.27038284 5.01133356 4.89587848 6.74675994 5.64405009 5.84136191
 6.0089698  4.92898051 5.9236389  7.46082046 6.23366056 5.18168657
 5.53203812 6.65699372 4.9763408  4.81228655 5.03112666 5.67589629
 6.08356808 6.22433893 5.84976123 6.02810086 5.91127713 5.60496179
 4.91867512 3.69022477 5.90836751 3.53432747 7.05308654 6.00029

In [12]:
# Test the result

import math
print(math.sqrt(((test_ds["OUTCOME"] - predict(train_ds,test_ds, k=5)) ** 2).mean())) # Answer should be = 0.3638982

0.3753732795418724
