# K-Nearest Neighbors

#### Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

#### Importing dataset

In [2]:
dataset = pd.read_csv('dataset/spotify_features.csv', sep=',')
dataset.head()

Unnamed: 0,track_popularity,instrumentalness,duration_ms,energy,acousticness,album_month,album_year,danceability,loudness,liveness,genre_code,release_month_code,valence,artist_code,speechiness,tempo
0,67.0,0.00421,162600.0,0.815,0.0724,12,2019,0.726,-4.969,0.357,2,2,0.693,4535,0.106983,99.972
1,70.0,2.3e-05,176616.0,0.931,0.0794,7,2019,0.675,-3.432,0.19031,2,5,0.613,7724,0.0742,124.008
2,60.0,9e-06,169093.0,0.93,0.0287,7,2019,0.718,-3.778,0.204,2,5,0.509838,6862,0.102,121.956
3,62.0,0.0,187675.0,0.856,0.187,7,2019,0.449,-4.788,0.176,2,5,0.152,3635,0.0623,112.648
4,58.0,5e-06,207894.0,0.923,0.146,6,2019,0.679,-6.5,0.124,2,6,0.752,2056,0.181,121.984


#### Preprocessing

In [3]:
model = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor(n_neighbors=8))
])

X = dataset.drop(columns=['track_popularity'])
y = dataset['track_popularity']

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25)

print(f'Training set: {X_train.shape}, {y_train.shape}')
print(f'Validation set: {X_val.shape}, {y_val.shape}')
print(f'Test set: {X_test.shape}, {y_test.shape}')

Training set: (11908, 15), (11908,)
Validation set: (3970, 15), (3970,)
Test set: (3970, 15), (3970,)


#### Model training

In [4]:
model.fit(X_train, y_train)

#### Model Evaluation

In [6]:
train_score = model.score(X_train, y_train)
val_score = model.score(X_val, y_val)
test_score = model.score(X_test, y_test)

print(f"Train R^2: {train_score:.2f}")
print(f"Validation R^2: {val_score:.2f}")
print(f"Test R^2: {test_score:.2f}")

mae = (y_test - model.predict(X_test)).abs().mean()
print(f'MAE: {mae:.2f}')
mse = ((y_test - model.predict(X_test))**2).mean()

Train R^2: 0.27
Validation R^2: 0.07
Test R^2: 0.07
MAE: 18.92
