In [None]:
import numpy as np

In [None]:
class KNN:
    def __init__(self, k: int):
        self.k = k # number of neighbors to use, don't change this

    def fit(self, X, y):
        """
        This method takes two parameters
            X: features, a two-dimensional array-like object with shape [m, d]
            y: labels, a one-dimensional array-like object with shape [m]
        """

        self.X = np.array(X)
        self.y = np.array(y)
        return self # returning self is convenient when chaining methods



    def predict(self, X):
        """
        This method takes a single parameter
            X: features, a two-dimensional array-like object with shape [m, d]

        Output: a numpy array of length m with the KNN prediction for each row of `X`
        """
        y_hat = []
        self.X_valid = np.array(X)

        for i in self.X_valid:
          XD = [] #to reinitialize the array each loop

          for j in self.X:
            D = np.sqrt((i[0] - j[0])**2 + (i[1] - j[1])**2) #Calculate euclidian distance
            mini_array = np.append(j, D)
            XD = np.append(XD, mini_array) #append the distance to each array segment

          XD = np.array(XD)  # Convert XD to a NumPy array
          XD = XD.reshape(len(self.X), len(i)+1) #reshape it to make it usable. the 3 columns are hard coded, so this code would only work if only 2 features.
          sorted_indices = np.argsort(XD[:, -1]) #sort
          k_nearest_index = sorted_indices[:self.k]   #retreive the index of the shortest k distances
          ynn = self.y[k_nearest_index]          #use that index to find the corresponding y values
          predicted_y = np.mean(ynn)             #compute the mean
          y_hat.append(predicted_y)              #append to y_hat
        return y_hat




    def score(self, X, y): # Outputs the R^2 of the prediction. Don't change this method.
        y_pred = self.predict(X) # get the prediction
        y_true = np.array(y)
        return 1 - ((y_true - y_pred)** 2).sum() / ((y_true - y_true.mean()) ** 2).sum()

In [None]:
# to test:
import pandas as pd

# Read the training data and split into X & y
df_train = pd.read_csv('elev_train.csv')
X_train, y_train = df_train[['lat', 'lon']], df_train['elevation']

# Fit the KNN
knn = KNN(5).fit(X_train, y_train)

#Read the validation data and split into X & y
df_valid = pd.read_csv('elev_valid.csv')
X_valid, y_valid = df_valid[['lat', 'lon']], df_valid['elevation']

# Compute the score
# the following should output 0.9527380878623475:
knn.score(X_valid, y_valid)

FileNotFoundError: [Errno 2] No such file or directory: 'elev_train.csv'