In [None]:
pip install spotipy



In [None]:
pip install chart_studio



In [None]:
# numpy import for math functions 
# square root, and sum
import numpy as np

# we define this outside of the class as a global function
# to be used inside the class
def euclidean_distance(first, second):
    return np.sqrt(np.sum((first-second)**2))

class KNN():
    # by default our number of neighbors
    # will be set to 3
    def __init__(self, k=3):

        self.k = k

    def fit(self, x, y=[]):
        # KNN does not require a training step
        # as it is "instance-based"
        # so instead we just store the data
        self.train = x
        self.target = y

    def _predict(self, a):
        # compute the distances between new test sample "a"
        # and all of the training samples "x_train" in "train"
        distances = [euclidean_distance(a, x_train) for x_train in self.train]
        # get the K nearest neighbors as well as labels
        # this will sort and return index values up to the
        # number of neighbors specified by "k"
        k_index = np.argsort(distances)[:self.k]
        k_label = [self.target[i] for i in k_index]
        # majority vote, most common class label
        majority = max(set(k_label), key=k_label.count)
        return majority

    def predict(self, x):
        # calls the helper function _predict on a data point
        predicted = [self._predict(a) for a in x]
        return np.array(predicted)

    def get_neighbors(self, a):
        # returns the K values or the "nearest neighbors"
        distances = [euclidean_distance(a, x_train) for x_train in self.train]
        k_index = np.argsort(distances)[:self.k]
        return k_index



In [None]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

cid = '41467bca65514079952e212808b80b3e'
secret = '97133de652814ffebb4d5eae856a315e'
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Load Spotify Dataframe
# Prepare Data for KNN (Scratch, and Sklearn)

df = pd.read_csv("https://raw.githubusercontent.com/Build-Week-Spotify/ds/master/data/SpotifyAudioFeaturesApril2019.csv")

from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer
from sklearn.neighbors import NearestNeighbors

labels = df[['artist_name','track_name']]

data = df.drop(columns = ['artist_name',
                            'duration_ms',
                            'track_id',
                            'mode',
                            'track_name',
                            'popularity',])

scaler = StandardScaler()

train = scaler.fit_transform(data) 

In [None]:
# next we need to figure out how to prepare our audio features which will be what we are basing our prediction on:

def get_audio_features(artist_name, track_name):

    result = sp.search(
        q=f'artist: {artist_name} track: {track_name}')  # send request to spotify api searching for artist name and track posted to the route.
    
    result = result['tracks']['items']

    track_id = result[0]['id']
    track_name = result[0]['name']
    artist_name = result[0]['artists'][0]['name']

    features = sp.audio_features(track_id)
    features = features[0]

    features = {'acousticness': features["acousticness"], 'danceability': features["danceability"],
            'energy': features['energy'], 'instrumentalness': features['instrumentalness'], 'key': features['key'],
            'liveness': features['liveness'], 'loudness': features['loudness'],
            'speechiness': features['speechiness'],'tempo': features['tempo'], 
            'time_signature': features['time_signature'], 'valence': features['valence']}


    features_df = pd.DataFrame(features, index=[0])

    audio_feats_scaled = scaler.transform(features_df)

    return audio_feats_scaled

In [None]:
# Input your desired search here and run:

artist_name = "Ludovico Einaudi"
song_name = "Nuvole Nere"

test = get_audio_features(artist_name, song_name)
test_labels = [artist_name, song_name]

test_feats_df = pd.DataFrame(test, columns=data.columns)
test_labels_df = pd.DataFrame()
test_labels_df = test_labels_df.assign(artist_name = [artist_name], song_name = [song_name])

In [None]:
# Code for processing and packaging Spotify results with Sklearn:

from sklearn.neighbors import NearestNeighbors

sknn = NearestNeighbors(n_neighbors=3)
sknn.fit(train)
sknn_neighbors = sknn.kneighbors(test)
sknn_neighbors = sknn_neighbors[1][0]

sknn_results = []
for i in sknn_neighbors:
  sknn_results.append(labels.iloc[i]['track_name'])
  sknn_results.append(labels.iloc[i]['artist_name'])

sknn_feats = []
for i in sknn_neighbors:
  sknn_feats.append(train[i])

sknn_song_names = []
for i in range(0,len(sknn_results), 2):
  sknn_song_names.append(sknn_results[i])

sknn_artist_names = []
for i in range(1,len(sknn_results), 2):
  sknn_artist_names.append(sknn_results[i])

sknn_labels_df = pd.DataFrame()

sknn_labels_df['artist_names'] = sknn_artist_names

sknn_labels_df['song_names'] = sknn_song_names

sknn_feats_df = pd.DataFrame(sknn_feats, columns=data.columns)

sknn_labels_df

Unnamed: 0,artist_names,song_names
0,Christopher Lennertz,Smith / The Forest
1,Johann Sebastian Bach,"Cello Suite No. 5 in C Minor, BWV 1011: IV. Sa..."
2,Johannes Brahms,"Symphony No. 1 in C Minor, Op. 68: II. Andante..."


In [None]:
# Code for processing and packaging Spotify results with My KNN:

my_knn = KNN(k=3)
my_knn.fit(train)
myknn_neighbors = my_knn.get_neighbors(test)

myknn_results = []
for i in myknn_neighbors:
  myknn_results.append(labels.iloc[i]['track_name'])
  myknn_results.append(labels.iloc[i]['artist_name'])

myknn_feats = []
for i in myknn_neighbors:
  myknn_feats.append(train[i])

myknn_song_names = []
for i in range(0,len(myknn_results), 2):
  myknn_song_names.append(myknn_results[i])

myknn_artist_names = []
for i in range(1,len(myknn_results), 2):
  myknn_artist_names.append(myknn_results[i])

myknn_labels_df = pd.DataFrame()

myknn_labels_df['artist_names'] = myknn_artist_names

myknn_labels_df['song_names'] = myknn_song_names

myknn_feats_df = pd.DataFrame(myknn_feats, columns=data.columns)

myknn_labels_df

Unnamed: 0,artist_names,song_names
0,Christopher Lennertz,Smith / The Forest
1,Johann Sebastian Bach,"Cello Suite No. 5 in C Minor, BWV 1011: IV. Sa..."
2,Johannes Brahms,"Symphony No. 1 in C Minor, Op. 68: II. Andante..."


In [None]:
# Code for visualizing the comparison between searched song, and the neighbors produced by Sklearn, and My KNN:

import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls
import random
import plotly.graph_objects as go


vals = [36, 72, 108, 144, 180, 216, 252, 288, 324, 360]
theta = [23, 59, 95, 131, 167, 203, 239, 275, 311, 347]

k = 4
k2 = 8
k3 = 12
k4 = 16
k5 = 20
k6 = 24
k7 = 28

theta2 = [x + k for x in theta]
theta3 = [x + k2 for x in theta]
theta4 = [x + k3 for x in theta]
theta5 = [x + k4 for x in theta]
theta6 = [x + k5 for x in theta]
theta7 = [x + k6 for x in theta]
theta8 = [x + k7 for x in theta]
width = 6

fig = go.Figure()
fig.add_trace(go.Barpolar(
    r=myknn_feats_df.iloc[0],
    theta=theta,
    width=width,
    marker_color='gray',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"My KNN: {myknn_labels_df['song_names'][0]} by {myknn_labels_df['artist_names'][0]}",
    hovertext=myknn_labels_df['song_names'][0],
    hoverinfo="text",
))

fig.add_trace(go.Barpolar(
    r=myknn_feats_df.iloc[1],
    theta=theta2,
    width=width,
    marker_color='aquamarine',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"My KNN: {myknn_labels_df['song_names'][1]} by {myknn_labels_df['artist_names'][1]}",
    hovertext=myknn_labels_df['song_names'][1],
    hoverinfo="text",
))

fig.add_trace(go.Barpolar(
    r=myknn_feats_df.iloc[2],
    theta=theta3,
    width=width,
    marker_color='blue',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"My KNN: {myknn_labels_df['song_names'][2]} by {myknn_labels_df['artist_names'][2]}",
    hovertext=myknn_labels_df['song_names'][2],
    hoverinfo="text",
))

fig.add_trace(go.Barpolar(
    r=sknn_feats_df.iloc[0],
    theta=theta5,
    width=width,
    marker_color='red',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"Sklearn: {sknn_labels_df['song_names'][0]} by {sknn_labels_df['artist_names'][0]}",
    hovertext=sknn_labels_df['song_names'][0],
    hoverinfo="text",
))

fig.add_trace(go.Barpolar(
    r=sknn_feats_df.iloc[1],
    theta=theta6,
    width=width,
    marker_color='orange',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"Sklearn: {sknn_labels_df['song_names'][1]} by {sknn_labels_df['artist_names'][1]}",
    hovertext=sknn_labels_df['song_names'][1],
    hoverinfo="text",
))

fig.add_trace(go.Barpolar(
    r=sknn_feats_df.iloc[2],
    theta=theta7,
    width=width,
    marker_color='yellow',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"Sklearn: {sknn_labels_df['song_names'][2]} by {sknn_labels_df['artist_names'][2]}",
    hovertext=sknn_labels_df['song_names'][2],
    hoverinfo="text",
))


fig.add_trace(go.Barpolar(
    r=test_feats_df.iloc[0],
    theta=theta8,
    width=width,
    marker_color='green',
    marker_line_color="black",
    marker_line_width=2,
    opacity=0.9,
    name=f"Your Searched Song: {test_labels_df['song_name'][0]} by {test_labels_df['artist_name'][0]}",
    hovertext=test_labels_df['song_name'][0],
    hoverinfo="text",
))



fig.update_layout(
    template='none',
    width=800,
    height=800,
    hoverlabel=dict(
    namelength=-1,
    bgcolor="white",
    bordercolor='black',
    font_size=16, 
    font_family="Rockwell",
    ),
    polar = dict(
        radialaxis = dict(range=[sknn_feats_df.min(), sknn_feats_df.max()], showticklabels=False, ticks=''),
        angularaxis = dict(showticklabels=True, tickmode='array', tickvals=vals, ticktext=sknn_feats_df.columns, ticks='')
    ),
    title={
        'text': "<b>Comparing Audio Features</b>",
        'y':0.95,
        'x':0.51,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size':20}},
    legend={
        'y':-0.33,
        'x':0.51,
        'xanchor': 'center',
        'yanchor': 'bottom'},
)

chart_studio.tools.set_credentials_file(username="kyates2861", api_key="yF1xUmMrTdq1ptoXorjb")
embed_var = py.plot(fig, filename = 'audio_sim', auto_open=True)
embed_link = tls.get_embed(embed_var)

fig.show()

In [None]:
embed_var

'https://plotly.com/~kyates2861/3/'