In [14]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px 
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import pairwise_distances_argmin_min
from sklearn.metrics.pairwise import euclidean_distances
from yellowbrick.target import FeatureCorrelation
import knntools

tracks = pd.read_csv('spotify_data.csv')
tracks['genre'] = tracks['genre'].str.strip().str.lower()
tracks.drop_duplicates(subset=['track_name', 'artist_name'], inplace=True)
tracks.dropna()


Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,-10.058,1,0.0429,0.6940,0.000000,0.1150,0.1390,133.406,240166,3
1,1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,-10.286,1,0.0258,0.4770,0.000014,0.0974,0.5150,140.182,216387,4
2,2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,-13.711,1,0.0323,0.3380,0.000050,0.0895,0.1450,139.832,158960,4
3,3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,-9.845,1,0.0363,0.8070,0.000000,0.0797,0.5080,204.961,304293,4
4,4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.430,0.791,6,-5.419,0,0.0302,0.0726,0.019300,0.1100,0.2170,171.864,244320,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,1473391,Nicola Conte,Black Spirits,0m27F0IGHLGAWhqd6ccYst,4,2011,trip-hop,0.373,0.742,10,-6.453,0,0.0736,0.3250,0.000141,0.1590,0.5220,107.951,344013,3
1159760,1473392,Nicola Conte,Quiet Dawn,6er9p611eHEcUCU50j7D57,3,2011,trip-hop,0.516,0.675,7,-7.588,0,0.0326,0.7880,0.000129,0.1300,0.2640,119.897,285067,4
1159761,1473393,Amon Tobin,Morning Ms Candis,7jsMMqxy1tt0rH5FzYcZTQ,2,2011,trip-hop,0.491,0.440,5,-8.512,1,0.0274,0.4770,0.003130,0.0936,0.0351,100.076,214253,4
1159762,1473394,Peace Orchestra,Happy Christmas (War Is Over),77lA1InUaXztuRk2vOzD1S,0,2011,trip-hop,0.480,0.405,0,-13.343,1,0.0276,0.4310,0.000063,0.1250,0.2020,133.885,239133,3


In [16]:
features = ['danceability', 'energy', 'key', 'loudness', 'mode',
            'speechiness', 'acousticness', 'instrumentalness', 'liveness',
            'valence', 'tempo', 'duration_ms', 'time_signature']
# standardize feature cols
scaler = StandardScaler()
scaledTracks = scaler.fit_transform(tracks[features])

kmeans = KMeans(n_clusters=10, random_state=42)
tracks['cluster'] = kmeans.fit_predict(scaledTracks)

le_artist = LabelEncoder()
le_track = LabelEncoder()

tracks['artist_name2'] = le_artist.fit_transform(tracks['artist_name'])
tracks['track_name2'] = le_track.fit_transform(tracks['track_name'])

k = 6

knn = NearestNeighbors(n_neighbors=k, algorithm='auto')
knn.fit(scaledTracks)

print(tracks.head())

def recommendSongs(inputTrackName, inputArtistName, tracks, features, scaler, kmeans, n_recommendations=5):
    inputTrackName = inputTrackName.strip().lower()
    inputArtistName = inputArtistName.strip().lower()
    
    # find input track
    inputTrack = tracks[(tracks['track_name'].str.lower() == inputTrackName) & (tracks['artist_name'].str.lower() == inputArtistName)]
    
    if inputTrack.empty:
        print("Track not found in dataset")
        return inputTrack 
    print("Input track found:")
    print(inputTrack)
    
    inputTrackFeatures = inputTrack[features]
    scaledInputTrackFeatures = scaler.transform(inputTrackFeatures)
    
    # predict cluster for the input track
    inputCluster = kmeans.predict(scaledInputTrackFeatures)[0]
    print(f"Predicted cluster: {inputCluster}")
    
    # get tracks from same cluster
    similarTracks = tracks[tracks['cluster'] == inputCluster]
    # exclude input track from recommendations
    similarTracks = similarTracks[(similarTracks['track_name'].str.lower() != inputTrackName) | (similarTracks['artist_name'].str.lower() != inputArtistName)]
    
    # calculate the distances to input track
    similarTracksFeatures = scaler.transform(similarTracks[features])
    distances = euclidean_distances(scaledInputTrackFeatures, similarTracksFeatures).flatten()
    # add distances to the df
    similarTracks['distance'] = distances
    
    # sort by distance and select top 5
    recommendations = similarTracks.sort_values(by=['distance', 'popularity'], ascending=[True, False]).drop_duplicates(subset=['artist_name', 'track_name']).head(n_recommendations)
    if len(recommendations) < n_recommendations:
        # fill remaining recommendations with similar tracks if duplicates
        remaining = similarTracks[~similarTracks.index.isin(recommendations.index)].head(n_recommendations - len(recommendations))
        recommendations = pd.concat([recommendations, remaining])
    
    return recommendations[['artist_name', 'track_name', 'popularity', 'genre', 'year']]


  super()._check_params_vs_input(X, default_n_init=10)


   Unnamed: 0    artist_name        track_name                track_id  \
0           0     Jason Mraz   I Won't Give Up  53QF56cjZA9RTuuMZDrSA6   
1           1     Jason Mraz  93 Million Miles  1s8tP3jP4GZcyHDsjvw218   
2           2  Joshua Hyslop  Do Not Let Me Go  7BRCa8MPiyuvr2VU3O9W0F   
3           3   Boyce Avenue          Fast Car  63wsZUhUZLlh1OsyrZq7sz   
4           4   Andrew Belle  Sky's Still Blue  6nXIYClvJAfi6ujLiKqEq8   

   popularity  year     genre  danceability  energy  key  ...  acousticness  \
0          68  2012  acoustic         0.483   0.303    4  ...        0.6940   
1          50  2012  acoustic         0.572   0.454    3  ...        0.4770   
2          57  2012  acoustic         0.409   0.234    3  ...        0.3380   
3          58  2012  acoustic         0.392   0.251   10  ...        0.8070   
4          54  2012  acoustic         0.430   0.791    6  ...        0.0726   

   instrumentalness  liveness  valence    tempo  duration_ms  time_signature  \


In [18]:
inputTrackName = input("Enter track name: ")
inputArtistName = input("Enters artist name: ")

print("You entered: ")
print(f"Track: {inputTrackName}")
print(f"Artist: {inputArtistName}")

You entered: 
Track: blinding lights
Artist: the weeknd


In [21]:
recommendations = recommendSongs(inputTrackName, inputArtistName, tracks, features, scaler, kmeans)
if recommendations.empty:
    print("No recommendations found")
else:
    print("Here are the top recommendations based on the inputted song:")
    print(recommendations[['artist_name', 'track_name', 'popularity', 'genre', 'year']])

input_song = knntools.find_song(
        inputArtistName.strip().lower(),
        inputTrackName.strip().lower(),
        tracks,
        le_artist,
        le_track
    )

if isinstance(input_song, str):
    input_song_dict = input_song.iloc[0].to_dict()
    similar_songs = knntools.find_similar_songs(input_song_dict, tracks, knn, scaler, le_artist, le_track)
    print("Here's a few songs I recommend you listen to!")
    print("=============================================")
    i = True
    for index, row in similar_songs.iterrows():
        # Lazy way of running first iteration on separate logic, doing this because knn returns original song too.
        if i:
            i = False
        else:
            print(f"'{row['track_name']}', by {row['artist_name']}\n")
else:
    print("No recommendations found")

Input track found:
        Unnamed: 0 artist_name       track_name                track_id  \
461991      461991  The Weeknd  Blinding Lights  0VjIjW4GlUZAMYd2vXMi3b   

        popularity  year genre  danceability  energy  key  ...  acousticness  \
461991          90  2020   pop         0.514    0.73    1  ...       0.00146   

        instrumentalness  liveness  valence    tempo  duration_ms  \
461991          0.000095    0.0897    0.334  171.005       200040   

        time_signature  cluster  artist_name2  track_name2  
461991               4        3         57002        91360  

[1 rows x 23 columns]
Predicted cluster: 3
Here are the top recommendations based on the inputted song:
       artist_name           track_name  popularity          genre  year
545781    ShockOne        Thinkin About          49  drum-and-bass  2022
367737      MISSIO            I See You          52       alt-rock  2019
339008      Tilian           Right Side          22       hardcore  2018
72872     C

AttributeError: 'str' object has no attribute 'iloc'