## KNN Content based approach - music recommender

KNN is used to associate songs based on the different features of the dataset.

In [2]:
# import dependencies

import pandas as pd
import numpy as np
import sklearn.preprocessing as preprocessing
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

In [4]:
data_df = pd.read_csv('./output.csv', header=0)

display(data_df.iloc[0])

artist_name                      Baka Beyond
title                  Journey Album Version
artist_location                     Cameroon
release                     The Meeting Pool
hotttness                           0.391484
familiarity                         0.559823
danceability                             0.0
duration                           368.14322
energy                                   0.0
loudness                             -11.482
year                                    1995
tempo                                139.092
analysis_rate                          22050
end_of_fade_in                           0.0
key                                        9
key_confidence                         0.588
mode                                       0
mode_confidence                        0.367
start_of_fade_out                    358.905
time_signature                             4
time_signature_conf                      0.0
song_id                   SOXFJZQ12A6D4F81FB
Name: 0, d

In [6]:
data_df = data_df.drop_duplicates(subset=['song_id', 'hotttness', 'familiarity', 'loudness', 'tempo', 'key', 'key_confidence', 'mode', 'mode_confidence'])
print(len(data_df.index))
data_df = data_df.reset_index(drop=True)

10000
10000


In [13]:
features = ['hotttness', 'familiarity', 'loudness', 'key', 'mode', 'tempo']
train_data = data_df[features]

In [15]:
# to avoid giving priority to features with bigger value range, normalization is done [0, 1]

scaler = preprocessing.MinMaxScaler()
train_data_normalized = scaler.fit_transform(train_data)

In [10]:
nearest_neighbor_model = NearestNeighbors(n_neighbors=10) # K = 10 for 10 closest songs
nearest_neighbor_model.fit(train_data_normalized)

NearestNeighbors(n_neighbors=10)

## Random song test

In [29]:
random_song = data_df.sample(n=1)
rand_song_features = random_song[features]

# normalize this data with scaler as well
rand_song_features = scaler.transform(rand_song_features)

In [33]:
# Run KNN for a random song

similar_song_indexes = nearest_neighbor_model.kneighbors(rand_song_features, return_distance=False).flatten()
print(similar_song_indexes)

[9525  544 7477 6961 6122 8176 6109 4115 3595 7817]


In [38]:
random_song[['artist_name', 'title', ] + features]

Unnamed: 0,artist_name,title,hotttness,familiarity,loudness,key,mode,tempo
9525,Severed Heads,Hot With Fleas,0.394274,0.525483,-7.574,1,1,121.369


## Find closest songs

In [39]:
closest_songs = data_df.iloc[similar_song_indexes, :]
closest_songs[['artist_name', 'title'] + features]

Unnamed: 0,artist_name,title,hotttness,familiarity,loudness,key,mode,tempo
9525,Severed Heads,Hot With Fleas,0.394274,0.525483,-7.574,1,1,121.369
544,Bertine Zetlitz,Midnight,0.379278,0.538402,-7.286,1,1,125.026
7477,DJ Gollum vs Basslovers United,Narcotic,0.40048,0.541764,-7.509,1,1,127.859
6961,Phil Ochs,Thats What I Want To Hear LP Version,0.387197,0.527316,-8.114,1,1,131.268
6122,Pete Philly Perquisite,Last Love Song,0.433781,0.53901,-7.623,1,1,126.057
8176,Pete Philly Perquisite,Paranoid,0.433781,0.53901,-8.558,1,1,121.998
6109,Obk,Último Carnaval,0.423704,0.528587,-8.694,1,1,129.957
4115,Candido,Thousand Finger Man,0.351932,0.544066,-6.418,1,1,121.921
3595,Neema,Indian Queen,0.35579,0.523161,-6.547,1,1,114.05
7817,Dionysus,Tides Will Turn,0.414753,0.537452,-5.475,1,1,127.052
