In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import copy
from IPython.display import display
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
#Data Imported From Kaggle Dataset
data = pd.read_csv("data.csv")
df = data.copy()

In [3]:
#Cleaning the Artist Column
def FixArtist(artist):
    artist=artist.replace("'","").replace("'","").replace('[','').replace(']','')
    return artist

df['artists'] = df['artists'].apply(FixArtist)
data['artists'] = data['artists'].apply(FixArtist)

In [4]:
#creating dictionary for lookup after predictions
dictionary = df[["artists",
                 "name", 
                 "key", 
                 "id",
                 "valence",
                 "acousticness",
                 "danceability",
                 "energy",
                 "instrumentalness",
                 "liveness",
                 "speechiness"]]

#Dropping columns not suited for a Standard Scaler fit which is neccesary for NearestNeighbors
df = df.drop(['artists','id','name','release_date'], axis = 1)

#Transforming dataset to standard scaler
scaler = StandardScaler()
df_s = scaler.fit_transform(df)

In [5]:
#The NN Model
nn = NearestNeighbors(n_neighbors=10, algorithm='ball_tree')
nn.fit(df_s)

NearestNeighbors(algorithm='ball_tree', n_neighbors=10)

In [6]:
#Method for importing elsewhere
def PredictNSimilarSongs(song_name, artist, n):
    
    
    #Matches Song with the artist
    index = 0
    indices = []
    
    for i, item in enumerate(data['artists']):
        if(item == artist):
            indices.append(i)
    
    for item in indices:
        if(data['name'][item] == song_name):
            index = item
    
    #Gets Predictions with NN
    neighbor_predictions = nn.kneighbors([df_s[index]])
    
    #Finds the prediction's names and who its by in the dictionary
    list_of_predictions = neighbor_predictions[1][0].tolist()

    ten_similar_tracks_title = []
    for item in list_of_predictions:
        track_hash = dictionary['name'].iloc[item]
        ten_similar_tracks_title.append(track_hash)
    
    ten_similar_tracks_artists = []
    for item in list_of_predictions:
        track_hash = dictionary['artists'].iloc[item]
        ten_similar_tracks_artists.append(track_hash)

    ten_suggestions = []
    for i in range(0,n+1):
        ten_suggestions.append(ten_similar_tracks_title[i] + ' by ' + ten_similar_tracks_artists[i])
        
    
    return ten_suggestions


In [7]:
def PlotSongAgainstSuggestions(song_name, artist, n):

    """Starts with the same code as PredictNSimilarSongs because I want 
    to use the hashes from the Scaled Output of the Nearest Neighbors algorithm to look up the
    numberic descriptions of each song"""
    
    #Matches Song with the artist
    index = 0
    indices = []
    
    for i, item in enumerate(data['artists']):
        if(item == artist):
            indices.append(i)
    
    for item in indices:
        if(data['name'][item] == song_name):
            index = item
    
    #Gets Predictions with NN
    neighbor_predictions = nn.kneighbors([df_s[index]])
    
    #Finds the prediction's names and who its by in the dictionary
    list_of_predictions = neighbor_predictions[1][0].tolist()

    ten_similar_tracks_title = []
    
    for item in list_of_predictions:
        track_hash = dictionary['name'].iloc[item]
        ten_similar_tracks_title.append(track_hash)
    
    ten_similar_tracks_artists = []
    for item in list_of_predictions:
        track_hash = dictionary['artists'].iloc[item]
        ten_similar_tracks_artists.append(track_hash)
    
    #Makes lists of all the song numeric descriptions for plotting later
    ten_similar_tracks_valence = []
    for item in list_of_predictions:
        track_hash = dictionary['valence'].iloc[item]
        ten_similar_tracks_valence.append(track_hash)
        
    ten_similar_tracks_acousticness = []
    for item in list_of_predictions:
        track_hash = dictionary['acousticness'].iloc[item]
        ten_similar_tracks_acousticness.append(track_hash)
        
    ten_similar_tracks_danceability = []
    for item in list_of_predictions:
        track_hash = dictionary['danceability'].iloc[item]
        ten_similar_tracks_danceability.append(track_hash)
        
    ten_similar_tracks_energy = []
    for item in list_of_predictions:
        track_hash = dictionary['energy'].iloc[item]
        ten_similar_tracks_energy.append(track_hash)
        
    ten_similar_tracks_instrumentalness = []
    for item in list_of_predictions:
        track_hash = dictionary['instrumentalness'].iloc[item]
        ten_similar_tracks_instrumentalness.append(track_hash)
        
    ten_similar_tracks_liveness = []
    for item in list_of_predictions:
        track_hash = dictionary['liveness'].iloc[item]
        ten_similar_tracks_liveness.append(track_hash)
        
    ten_similar_tracks_speechiness = []
    for item in list_of_predictions:
        track_hash = dictionary['speechiness'].iloc[item]
        ten_similar_tracks_speechiness.append(track_hash)
        
        
    ten_suggestions = []
    for i in range(0,n+1):
        ten_suggestions.append(ten_similar_tracks_title[i] + ' by ' + ten_similar_tracks_artists[i])
        
    
    ten_suggestions    
    
    #Plotting
    fig = make_subplots(rows=2, cols=2,specs=[[{'type': 'polar'}]*2]*2)

    fig.add_trace(go.Scatterpolar(
        name = "Original Song",
        r = [ten_similar_tracks_valence[0],
            ten_similar_tracks_acousticness[0],
            ten_similar_tracks_danceability[0],
            ten_similar_tracks_energy[0],
            ten_similar_tracks_instrumentalness[0],
            ten_similar_tracks_liveness[0],
            ten_similar_tracks_speechiness[0],
            ten_similar_tracks_valence[0]],
        
        theta = ["valence", "acousticness", "danceability", "energy", "instrumentalness", "liveness", "speechiness", "valence"],
    ), 1, 1)
    
    #Loops through the lists and plots each feature for each song recommendation
    for i in range(1,n):
        fig.add_trace(go.Scatterpolar(
          name = ten_similar_tracks_title[i],
          r = [ten_similar_tracks_valence[i],
               ten_similar_tracks_acousticness[i],
               ten_similar_tracks_danceability[i],
               ten_similar_tracks_energy[i],
               ten_similar_tracks_instrumentalness[i],
               ten_similar_tracks_liveness[i],
               ten_similar_tracks_speechiness[i],
               ten_similar_tracks_valence[i]],

          theta = ["valence", "acousticness", "danceability", "energy", "instrumentalness", "liveness", "speechiness", "valence"],
        ), 1, 1)

    #tidying up the plot
    fig.update_layout(
        polar = dict(
        radialaxis_angle = 90,
        radialaxis = dict(range=[0, 1], showticklabels=False),
        angularaxis = dict(direction = "clockwise", period = 7)))
    
    fig.show()
    
    
    pass

In [8]:
#Used for the autocomplete drop down box when submitting your song
def find_word(word,df,number=10):
    df.drop_duplicates(inplace=True)
    words=df['name'].values
    artists=df['artists'].values
    t=[]
    count=0
    if word[-1]==' ':
        word=word[:-1]
    for i in words:
        if word.lower() in i.lower():
            t.append([len(word)/len(i),count])
        else:
            t.append([0,count])
        count+=1
    t.sort(reverse=True)
    s=[[words[t[i][1]],artists[t[i][1]].strip('][').split(', ')] for i in range(number)]   
    songs=[words[t[i][1]] for i in range(number)]
    artist=[artists[t[i][1]] for i in range(number)]
    x=[]
    for i in s:
        l=''
        by=''
        for j in i[1]:
            by+=j
        l+=i[0]+' by '+by
        x.append(l)
    tup=[]
    for i in range(number):
        tup.append((x[i],i))

    
    return tup,songs,artist

In [13]:
a=input('Please enter The name of the song :')
b=int(input('Please enter the number of recommendations you want: '))
tup,s,ar=find_word(a,data)
ans=widgets.Dropdown(
    options=tup,
    description='Closest Songs To '+a,
    disabled=False,
)
display(ans)

Please enter The name of the song :stronger
Please enter the number of recommendations you want: 4


Dropdown(description='Closest Songs To stronger', options=(('Stronger by TheFatRatSlayditAnjulie', 0), ('Stron…

In [14]:
prediction = PredictNSimilarSongs(s[ans.value], ar[ans.value], b)

In [15]:
prediction

['Stronger by Kanye West',
 'I Love This Bar by Toby Keith',
 'One More Time - Radio Edit [Short Radio Edit] by Daft Punk',
 'I Still Love You by 702',
 'I Love This Bar by Toby Keith']

In [16]:
PlotSongAgainstSuggestions(s[ans.value], ar[ans.value], b)