# Brute-force Nearest Neighbors Algorithm
<br>
Code a nearest neighbor algorithm that works for 2-dimensional data. Test it against SKLearn package on music dataset to ensure accuracy. 

In [1]:
# Import modules.
import pandas as pd
import numpy as np
import scipy
import sklearn
import matplotlib.pyplot as plt

# Example data.
music = pd.DataFrame()

# Some data to play with.
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]

# We know whether the songs in our training data are jazz or not.
music['jazz'] = [ 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
                  0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                  1, 1, 1, 1, 0, 0, 1, 1, 0, 0]

# Euclidean distance.
def eDistance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    dSquared = dx ** 2 + dy ** 2
    result = dSquared ** 0.5
    return result

def knn(df):
    
    """
    
    This function classifies the user-inputted musical genre and checks against
    SKLearn's KNeighborsClassifier function for accuracy.
    
    """
    
    num_neighbors = int(input("Please enter number of neighbors."))
    user_duration = int(input("Please enter song duration."))
    user_loudness = int(input("Please enter song loudness."))
    
    feature1 = df.iloc[:, 0]
    feature2 = df.iloc[:, 1]
    
    df['distance'] = eDistance(user_duration, user_loudness, feature1, feature2)
    
    dfSortedDistance = df.sort_values('distance').reset_index(drop=True).loc[0:num_neighbors - 1, :]
    
    p0 = dfSortedDistance[dfSortedDistance.jazz == 0]['jazz'].agg('count') / num_neighbors
    p1 = dfSortedDistance[dfSortedDistance.jazz == 1]['jazz'].agg('count') / num_neighbors
    output = round(dfSortedDistance['jazz'].mean(), 0)
    
    print('\n')
    
    print('Assuming', num_neighbors, 'nearest neighbors...')
    
    if output == 0:
        print('...the predicted song is Rock.')
    else:
        print('...the predicted song is Jazz.')
    
    print('% votes for Rock:', p0)
    print('% votes for Jazz:', p1)
    print('\n')
    
    # Check against sklearn.
    from sklearn.neighbors import KNeighborsClassifier
    neighbors = KNeighborsClassifier(n_neighbors = num_neighbors)
    
    X = music[['loudness', 'duration']]
    Y = music.jazz
    neighbors.fit(X, Y)
    
    print('SKLearn predicted genre:')
    
    sk_pred = neighbors.predict([[user_loudness, user_duration]])
    sk_pred_proba = neighbors.predict_proba([[user_loudness, user_duration]])
    
    if sk_pred == 0:
        print('Rock')
    else:
        print('Jazz')
    
    print('% votes Rock, Jazz:', sk_pred_proba)
    
def main():
    print(knn.__doc__)
    df = music
    print()
    print('Example data:\n', df.head(), '\n')
    knn(df)
    
if __name__ == "__main__":
    main()


    
    This function classifies the user-inputted musical genre and checks against
    SKLearn's KNeighborsClassifier function for accuracy.
    
    

Example data:
    duration  loudness  jazz
0       184        18     1
1       134        34     0
2       243        43     0
3       186        36     0
4       122        22     1 

Please enter number of neighbors.5
Please enter song duration.160
Please enter song loudness.40


Assuming 5 nearest neighbors...
...the predicted song is Jazz.
% votes for Rock: 0.4
% votes for Jazz: 0.6


SKLearn predicted genre:
Jazz
% votes Rock, Jazz: [[0.4 0.6]]
