In [1]:
import numpy as np

#from utils import euclidean_distance, manhattan_distance

In [13]:
def euclidean_distance(x1, x2):
    """
    Computes and returns the Euclidean distance between two vectors.

    Args:
        x1: A numpy array of shape (n_features,).
        x2: A numpy array of shape (n_features,).
    """
    '''
    distance = 0.0
    for i in range(len(x1)-1):
        distance += (x1[i] - x2[i])**2
    return sqrt(distance)
    '''
    euc_dist = np.sqrt((np.array(x1) - np.array(x2))**2)
    return (euc_dist)

    #raise NotImplementedError('This function must be implemented by the student.')

In [14]:
def manhattan_distance(x1, x2):
    """
    Computes and returns the Manhattan distance between two vectors.

    Args:
        x1: A numpy array of shape (n_features,).
        x2: A numpy array of shape (n_features,).
    """
   
    return(sum(abs(np.array(a)-np.array(b))))

    #raise NotImplementedError('This function must be implemented by the student.')


In [15]:
class KNearestNeighbors:
    """
    A class representing the machine learning implementation of a K-Nearest Neighbors classifier from scratch.

    Attributes:
        n_neighbors
            An integer representing the number of neighbors a sample is compared with when predicting target class
            values.

        weights
            A string representing the weight function used when predicting target class values. The possible options are
            {'uniform', 'distance'}.

        _X
            A numpy array of shape (n_samples, n_features) representing the input data used when fitting the model and
            predicting target class values.

        _y
            A numpy array of shape (n_samples,) representing the true class values for each sample in the input data
            used when fitting the model and predicting target class values.

        _distance
            An attribute representing which distance metric is used to calculate distances between samples. This is set
            when creating the object to either the euclidean_distance or manhattan_distance functions defined in
            utils.py based on what argument is passed into the metric parameter of the class.

    Methods:
        fit(X, y)
            Fits the model to the provided data matrix X and targets y.

        predict(X)
            Predicts class target values for the given test data matrix X using the fitted classifier model.
    """

    def __init__(self, n_neighbors = 5, weights = 'uniform', metric = 'l2'):
        # Check if the provided arguments are valid
        if weights not in ['uniform', 'distance'] or metric not in ['l1', 'l2'] or not isinstance(n_neighbors, int):
            raise ValueError('The provided class parameter arguments are not recognized.')

        # Define and setup the attributes for the KNearestNeighbors model object
        self.n_neighbors = n_neighbors
        self.weights = weights
        self._X = None
        self._y = None
        self._distance = euclidean_distance if metric == 'l2' else manhattan_distance

    def fit(self, X, y):
        """
        Fits the model to the provided data matrix X and targets y.

        Args:
            X: A numpy array of shape (n_samples, n_features) representing the input data.
            y: A numpy array of shape (n_samples,) representing the true class values for each sample in the input data.

        Returns:
            None.
        """
        
        self.X_train = X
        self.Y_train = y

        
        
        #raise NotImplementedError('This function must be implemented by the student.')

    def predict(self, X):
        """
        Predicts class target values for the given test data matrix X using the fitted classifier model.

        Args:
            X: A numpy array of shape (n_samples, n_features) representing the test data.

        Returns:
            A numpy array of shape (n_samples,) representing the predicted target class values for the given test data.
        """
        predictions = []
        for row in X:
            dist = np.array([self._distance(row, x_t) for x_t in self.X_train])
            dist_sorted = dist.argsort()[:self.n_neighbors]

            output_values = [self.Y_train[r] for r in dist_sorted]
            prediction = max(set(output_values), key=output_values.count)

            predictions.append(prediction)
        '''
        # list to store all our predictions
        predictions = []
        
        # loop over all observations in the test set
        for i in range(len(X)):            
            
            # calculate the distance between the test point and all other points in the training set
            dist = np.array([self._distance(X[i], x_t) for x_t in self.X_train])
            
            # sort the distances and return the indices of K neighbors
            dist_sorted = dist.argsort()[:self.n_neighbors]
            
            # get the neighbors
            neigh_count = {}
 
            # for each neighbor find the class
            for idx in dist_sorted:
                if self.Y_train[idx] in neigh_count:
                    neigh_count[self.Y_train[idx]] += 1
                else:
                    neigh_count[self.Y_train[idx]] = 1
            
            sorted_neigh_count = sorted(neigh_count.items(), key=operator.itemgetter(1), reverse=True)

            
            # append the class label to the list
            predictions.append(sorted_neigh_count[0][0])
        '''
        return predictions

        #raise NotImplementedError('This function must be implemented by the student.')
