In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

music = pd.DataFrame()
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]
music['bpm'] = [ 105, 90, 78, 75, 120, 110, 80, 100, 105, 60,
                  70, 105, 95, 70, 90, 105, 70, 75, 102, 100,
                  100, 95, 90, 80, 90, 80, 100, 105, 70, 65]

In [2]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score

## Try simple model

knn_1 = KNeighborsRegressor(n_neighbors=10)
X = pd.DataFrame()
X['duration'] = music['duration']
X['loudness'] = music['loudness']
Y = music['bpm']
knn_1.fit(X, Y)

score_1 = cross_val_score(knn_1, X, Y, cv=5)
print('Accuracy knn_1: %0.2f (+/- %0.2f)' % (score_1.mean(), score_1.std() *2))

Accuracy knn_1: -0.42 (+/- 0.78)


In [3]:
## Try including weights

knn_2 = KNeighborsRegressor(n_neighbors=10, weights='distance')
knn_2.fit(X, Y)
score_2 = cross_val_score(knn_2, X, Y, cv=5)
print('Accuracy knn_2: %0.2f (+/- %0.2f)' % (score_2.mean(), score_2.std() *2))

Accuracy knn_2: -0.26 (+/- 0.74)


In [4]:
## Try reducing number of neighbors

knn_3 = KNeighborsRegressor(n_neighbors=5)
knn_3.fit(X, Y)
score_3 = cross_val_score(knn_3, X, Y, cv=5)
print('Accuracy knn_3: %0.2f (+/- %0.2f)' % (score_3.mean(), score_3.std()*2))

Accuracy knn_3: -0.75 (+/- 1.16)


In [5]:
np.arange(1, 5)

array([1, 2, 3, 4])

In [6]:
## Write function to test all neighbors, no weights

def KNN_2var_noWeight(min_n, max_n):
    for n in np.arange(min_n, max_n+1):
        knn_test = KNeighborsRegressor(n_neighbors=n)
        score_test = cross_val_score(knn_test, X, Y, cv=5)
        print('Accuracy knn_test_%0.0f %0.2f (+/- %0.2f)' % (n, score_test.mean(), score_test.std()*2))


In [7]:
## Results of different n, no weights

KNN_2var_noWeight(1, 20)

Accuracy knn_test_1 -1.54 (+/- 4.67)
Accuracy knn_test_2 -0.52 (+/- 1.10)
Accuracy knn_test_3 -0.69 (+/- 1.03)
Accuracy knn_test_4 -0.84 (+/- 1.25)
Accuracy knn_test_5 -0.75 (+/- 1.16)
Accuracy knn_test_6 -0.51 (+/- 0.81)
Accuracy knn_test_7 -0.46 (+/- 0.76)
Accuracy knn_test_8 -0.49 (+/- 0.79)
Accuracy knn_test_9 -0.42 (+/- 0.60)
Accuracy knn_test_10 -0.42 (+/- 0.78)
Accuracy knn_test_11 -0.38 (+/- 0.73)
Accuracy knn_test_12 -0.38 (+/- 0.63)
Accuracy knn_test_13 -0.38 (+/- 0.73)
Accuracy knn_test_14 -0.34 (+/- 0.75)
Accuracy knn_test_15 -0.38 (+/- 0.65)
Accuracy knn_test_16 -0.32 (+/- 0.52)
Accuracy knn_test_17 -0.31 (+/- 0.42)
Accuracy knn_test_18 -0.36 (+/- 0.57)
Accuracy knn_test_19 -0.34 (+/- 0.61)
Accuracy knn_test_20 -0.32 (+/- 0.59)


In [8]:
## Function to test different n, weighted by distance

def KNN_2var_dist(min_n, max_n):
    for n in np.arange(min_n, max_n+1):
        knn_test = KNeighborsRegressor(n_neighbors=n, weights='distance')
        score_test = cross_val_score(knn_test, X, Y, cv=5)
        print('Accuracy knn_test_%0.0f %0.2f (+/- %0.2f)' % (n, score_test.mean(), score_test.std()*2))

In [9]:
## Results of different n, weighted by distance

KNN_2var_dist(1, 20)

Accuracy knn_test_1 -1.54 (+/- 4.67)
Accuracy knn_test_2 -0.61 (+/- 2.20)
Accuracy knn_test_3 -0.59 (+/- 1.75)
Accuracy knn_test_4 -0.63 (+/- 1.64)
Accuracy knn_test_5 -0.51 (+/- 1.24)
Accuracy knn_test_6 -0.37 (+/- 0.88)
Accuracy knn_test_7 -0.30 (+/- 0.79)
Accuracy knn_test_8 -0.30 (+/- 0.80)
Accuracy knn_test_9 -0.27 (+/- 0.69)
Accuracy knn_test_10 -0.26 (+/- 0.74)
Accuracy knn_test_11 -0.26 (+/- 0.75)
Accuracy knn_test_12 -0.25 (+/- 0.66)
Accuracy knn_test_13 -0.25 (+/- 0.67)
Accuracy knn_test_14 -0.23 (+/- 0.67)
Accuracy knn_test_15 -0.23 (+/- 0.61)
Accuracy knn_test_16 -0.23 (+/- 0.60)
Accuracy knn_test_17 -0.22 (+/- 0.57)
Accuracy knn_test_18 -0.23 (+/- 0.60)
Accuracy knn_test_19 -0.23 (+/- 0.60)
Accuracy knn_test_20 -0.23 (+/- 0.60)


In [10]:
## Try noramlizing the inputs

from scipy import stats

X_norm = pd.DataFrame({
    'loudness': stats.zscore(music.loudness),
    'duration': stats.zscore(music.duration)
})


In [11]:
## Function to test all neighbors, normalized, no weights

def KNN_2var_noW_norm(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n)
        score_test = cross_val_score(knn_test, X_norm, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))

In [12]:
## Result of all neighbors, normalized, no weights

KNN_2var_noW_norm(1, 20)

Accuracy knn_test_1: -1.73 (+/- 4.45)
Accuracy knn_test_2: -1.06 (+/- 2.96)
Accuracy knn_test_3: -0.54 (+/- 1.67)
Accuracy knn_test_4: -0.24 (+/- 1.13)
Accuracy knn_test_5: -0.37 (+/- 1.46)
Accuracy knn_test_6: -0.37 (+/- 1.32)
Accuracy knn_test_7: -0.43 (+/- 1.66)
Accuracy knn_test_8: -0.18 (+/- 0.91)
Accuracy knn_test_9: -0.14 (+/- 0.72)
Accuracy knn_test_10: -0.11 (+/- 0.50)
Accuracy knn_test_11: -0.10 (+/- 0.35)
Accuracy knn_test_12: -0.12 (+/- 0.45)
Accuracy knn_test_13: -0.15 (+/- 0.47)
Accuracy knn_test_14: -0.16 (+/- 0.45)
Accuracy knn_test_15: -0.19 (+/- 0.39)
Accuracy knn_test_16: -0.20 (+/- 0.37)
Accuracy knn_test_17: -0.21 (+/- 0.38)
Accuracy knn_test_18: -0.18 (+/- 0.31)
Accuracy knn_test_19: -0.21 (+/- 0.26)
Accuracy knn_test_20: -0.22 (+/- 0.36)


In [13]:
## Function to try with weighted by distance

def KNN_2var_norm(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n, weights='distance')
        score_test = cross_val_score(knn_test, X_norm, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))

In [14]:
## Results of different n, weighted by distance, normalized inputs

KNN_2var_norm(1, 20)

Accuracy knn_test_1: -1.73 (+/- 4.45)
Accuracy knn_test_2: -1.06 (+/- 3.27)
Accuracy knn_test_3: -0.62 (+/- 2.29)
Accuracy knn_test_4: -0.44 (+/- 1.91)
Accuracy knn_test_5: -0.43 (+/- 1.99)
Accuracy knn_test_6: -0.38 (+/- 1.84)
Accuracy knn_test_7: -0.37 (+/- 1.86)
Accuracy knn_test_8: -0.23 (+/- 1.48)
Accuracy knn_test_9: -0.17 (+/- 1.23)
Accuracy knn_test_10: -0.11 (+/- 1.05)
Accuracy knn_test_11: -0.11 (+/- 0.98)
Accuracy knn_test_12: -0.12 (+/- 0.98)
Accuracy knn_test_13: -0.10 (+/- 0.91)
Accuracy knn_test_14: -0.10 (+/- 0.90)
Accuracy knn_test_15: -0.11 (+/- 0.85)
Accuracy knn_test_16: -0.11 (+/- 0.80)
Accuracy knn_test_17: -0.12 (+/- 0.79)
Accuracy knn_test_18: -0.11 (+/- 0.75)
Accuracy knn_test_19: -0.13 (+/- 0.71)
Accuracy knn_test_20: -0.13 (+/- 0.71)


In [15]:
X_loud = np.array(X['loudness'])[:, np.newaxis]

In [16]:
## Try just loudness

def KNN_loudness(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n)
        score_test = cross_val_score(knn_test, X_loud, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))

In [17]:
KNN_loudness(1, 20)

Accuracy knn_test_1: -0.94 (+/- 5.18)
Accuracy knn_test_2: -0.40 (+/- 2.22)
Accuracy knn_test_3: 0.07 (+/- 0.68)
Accuracy knn_test_4: -0.04 (+/- 0.87)
Accuracy knn_test_5: -0.28 (+/- 1.20)
Accuracy knn_test_6: -0.18 (+/- 0.94)
Accuracy knn_test_7: -0.25 (+/- 1.07)
Accuracy knn_test_8: -0.25 (+/- 0.90)
Accuracy knn_test_9: -0.18 (+/- 0.74)
Accuracy knn_test_10: -0.18 (+/- 0.66)
Accuracy knn_test_11: -0.09 (+/- 0.42)
Accuracy knn_test_12: -0.10 (+/- 0.46)
Accuracy knn_test_13: -0.08 (+/- 0.44)
Accuracy knn_test_14: -0.06 (+/- 0.47)
Accuracy knn_test_15: -0.09 (+/- 0.42)
Accuracy knn_test_16: -0.04 (+/- 0.36)
Accuracy knn_test_17: -0.05 (+/- 0.33)
Accuracy knn_test_18: -0.04 (+/- 0.29)
Accuracy knn_test_19: -0.07 (+/- 0.28)
Accuracy knn_test_20: -0.12 (+/- 0.24)


In [18]:
## Try just loudness and a weighting

def KNN_w_loudness(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n, weights='distance')
        score_test = cross_val_score(knn_test, X_loud, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))


In [19]:
KNN_w_loudness(1, 20)

Accuracy knn_test_1: -0.94 (+/- 5.18)
Accuracy knn_test_2: -0.27 (+/- 2.36)
Accuracy knn_test_3: 0.14 (+/- 0.84)
Accuracy knn_test_4: 0.16 (+/- 0.86)
Accuracy knn_test_5: 0.12 (+/- 0.93)
Accuracy knn_test_6: 0.17 (+/- 0.82)
Accuracy knn_test_7: 0.10 (+/- 0.98)
Accuracy knn_test_8: 0.10 (+/- 0.98)
Accuracy knn_test_9: 0.12 (+/- 0.93)
Accuracy knn_test_10: 0.11 (+/- 0.94)
Accuracy knn_test_11: 0.13 (+/- 0.87)
Accuracy knn_test_12: 0.13 (+/- 0.83)
Accuracy knn_test_13: 0.14 (+/- 0.80)
Accuracy knn_test_14: 0.14 (+/- 0.78)
Accuracy knn_test_15: 0.13 (+/- 0.79)
Accuracy knn_test_16: 0.14 (+/- 0.76)
Accuracy knn_test_17: 0.14 (+/- 0.76)
Accuracy knn_test_18: 0.14 (+/- 0.74)
Accuracy knn_test_19: 0.13 (+/- 0.75)
Accuracy knn_test_20: 0.13 (+/- 0.73)


In [20]:
## Try just duration

X_dur = np.array(X.duration)[:, np.newaxis]

def KNN_duration(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n)
        score_test = cross_val_score(knn_test, X_dur, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))

In [21]:
KNN_duration(1, 20)

Accuracy knn_test_1: -1.36 (+/- 2.87)
Accuracy knn_test_2: -0.78 (+/- 1.18)
Accuracy knn_test_3: -0.34 (+/- 0.27)
Accuracy knn_test_4: -0.76 (+/- 0.70)
Accuracy knn_test_5: -0.70 (+/- 1.02)
Accuracy knn_test_6: -0.76 (+/- 1.38)
Accuracy knn_test_7: -0.66 (+/- 1.33)
Accuracy knn_test_8: -0.53 (+/- 0.94)
Accuracy knn_test_9: -0.41 (+/- 0.63)
Accuracy knn_test_10: -0.44 (+/- 0.81)
Accuracy knn_test_11: -0.40 (+/- 0.73)
Accuracy knn_test_12: -0.39 (+/- 0.76)
Accuracy knn_test_13: -0.42 (+/- 0.83)
Accuracy knn_test_14: -0.36 (+/- 0.73)
Accuracy knn_test_15: -0.39 (+/- 0.64)
Accuracy knn_test_16: -0.35 (+/- 0.51)
Accuracy knn_test_17: -0.33 (+/- 0.48)
Accuracy knn_test_18: -0.38 (+/- 0.65)
Accuracy knn_test_19: -0.37 (+/- 0.76)
Accuracy knn_test_20: -0.31 (+/- 0.56)


In [22]:
## Try just duration and weights

def KNN_w_duration(n_min, n_max):
    for n in np.arange(n_min, n_max+1):
        knn_test = KNeighborsRegressor(n, weights='distance')
        score_test = cross_val_score(knn_test, X_dur, Y, cv=5)
        print('Accuracy knn_test_%0.0f: %0.2f (+/- %0.2f)' %(n, score_test.mean(), score_test.std()*2))

In [23]:
KNN_w_duration(1, 20)

Accuracy knn_test_1: -1.36 (+/- 2.87)
Accuracy knn_test_2: -1.01 (+/- 0.99)
Accuracy knn_test_3: -0.88 (+/- 0.98)
Accuracy knn_test_4: -0.94 (+/- 0.86)
Accuracy knn_test_5: -0.94 (+/- 0.90)
Accuracy knn_test_6: -0.94 (+/- 0.93)
Accuracy knn_test_7: -0.89 (+/- 0.90)
Accuracy knn_test_8: -0.82 (+/- 0.81)
Accuracy knn_test_9: -0.78 (+/- 0.79)
Accuracy knn_test_10: -0.78 (+/- 0.77)
Accuracy knn_test_11: -0.78 (+/- 0.77)
Accuracy knn_test_12: -0.76 (+/- 0.75)
Accuracy knn_test_13: -0.76 (+/- 0.74)
Accuracy knn_test_14: -0.75 (+/- 0.76)
Accuracy knn_test_15: -0.75 (+/- 0.76)
Accuracy knn_test_16: -0.74 (+/- 0.76)
Accuracy knn_test_17: -0.74 (+/- 0.75)
Accuracy knn_test_18: -0.75 (+/- 0.74)
Accuracy knn_test_19: -0.76 (+/- 0.74)
Accuracy knn_test_20: -0.75 (+/- 0.75)


In [24]:
## These are all terrible!