In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from pathlib import Path
import pandas as pd

In [2]:
path = Path("C:/Users/ms101/OneDrive/DataScience_ML/projects/spotify_app")

In [3]:
with open(path / "client_s.txt") as f:
    content = f.readlines()
content = [x.strip() for x in content]

In [4]:
client_id = content[0]
client_secret = content[1]

In [5]:
with open(path / "usernames.txt") as f:
    usernames = f.readlines()
usernames = [x.strip() for x in usernames]

In [6]:
username1 = usernames[0]
username2 = usernames[1]

In [7]:
scope = "user-library-read user-read-recently-played user-top-read playlist-modify-public playlist-read-private"

In [8]:
redirect_uri = "https://developer.spotify.com/dashboard/applications/4a4e029d299a4241873db8300038bf0a"


In [9]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)

sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [10]:
token = util.prompt_for_user_token(username1, scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username1)

In [11]:
results = sp.current_user_top_tracks(limit=50, offset=0,time_range='short_term')

In [12]:
#results

In [13]:
# Convert it to Dataframe
track_name = []
track_id = []
artist = []
album = []
duration = []
popularity = []
for i, items in enumerate(results['items']):
        track_name.append(items['name'])
        track_id.append(items['id'])
        artist.append(items["artists"][0]["name"])
        duration.append(items["duration_ms"])
        album.append(items["album"]["name"])
        popularity.append(items["popularity"])

# Create the final df   
df_favourite = pd.DataFrame({ "track_name": track_name, 
                             "album": album, 
                             "track_id": track_id,
                             "artist": artist, 
                             "duration": duration, 
                             "popularity": popularity})

df_favourite

Unnamed: 0,track_name,album,track_id,artist,duration,popularity
0,10 Feet Down,Perception,68biLwi894rMQPeIiSky2t,NF,217106,63
1,Fall Slowly (feat. Ashanti) - Extended Version,Evolution,7wK4pOTZKVaAJ00rziu901,Joyner Lucas,292056,59
2,Let You Down,Perception,52okn5MNA47tk87PeZJLEL,NF,212120,82
3,Piece Of Your Heart,Piece Of Your Heart,1DFD5Fotzgn6yYXkYsKiGs,MEDUZA,152913,82
4,Berlin Nights,Berlin Nights,6gG1R1bFdJeNc2ERAwXxCb,Vnce Dolanbay,292115,39
5,Evolution,Evolution,2VopDw2GlF3uwD1kihHmTT,Joyner Lucas,153250,62
6,Rolls Royce,Rolls Royce,22XFe65IH0P2RY0uowBqdI,Geegun,143437,68
7,MOSKAU,"REISE, REISE",4L9UGREMQBfYLmGwlACgTV,Rammstein,256306,60
8,Really Really,Islah (Deluxe),10I3CmmwT0BkOVhduDy53o,Kevin Gates,232093,70
9,Give It to Me - Twin & Vane Remix,Give It to Me (Twin & Vane Remix),6vKqPHrfOVJMsmuVNa4aqy,VANE,176587,50


In [14]:
audio_features = sp.audio_features(df_favourite["track_id"][:])

In [15]:
len(audio_features)

50

In [16]:
feature_cols = list(audio_features[0].keys())[:-7]
feature_cols

['danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo']

In [17]:
features_list = []
for features in audio_features:
        song_features = [features[col] for col in feature_cols]
        features_list.append(song_features)
features_list

[[0.62, 0.835, 5, -6.634, 0, 0.0984, 0.0964, 1.23e-05, 0.269, 0.0535, 96.09],
 [0.542, 0.453, 6, -11.208, 0, 0.24, 0.204, 6.81e-06, 0.137, 0.0375, 88.31],
 [0.662, 0.714, 5, -5.68, 0, 0.121, 0.312, 0, 0.179, 0.464, 147.997],
 [0.677, 0.744, 10, -6.806, 0, 0.0295, 0.0404, 0.00016, 0.074, 0.631, 124.08],
 [0.901, 0.457, 10, -13.238, 0, 0.163, 0.229, 0.424, 0.0977, 0.531, 127.999],
 [0.687, 0.819, 9, -6.67, 0, 0.431, 0.218, 0, 0.392, 0.568, 81.185],
 [0.872, 0.423, 1, -7.337, 1, 0.213, 0.142, 9.5e-06, 0.104, 0.373, 95.98],
 [0.493, 0.963, 11, -3.41, 1, 0.0638, 2.25e-05, 0.667, 0.305, 0.305, 147.992],
 [0.762, 0.666, 0, -6.055, 1, 0.0916, 0.00427, 0, 0.127, 0.278, 118.97],
 [0.777, 0.75, 1, -7.21, 1, 0.105, 0.0431, 0.00767, 0.595, 0.328, 119.992],
 [0.722, 0.814, 3, -9.946, 0, 0.0412, 0.00551, 0.922, 0.0906, 0.163, 121.997],
 [0.878, 0.515, 1, -7.715, 1, 0.188, 0.0292, 0, 0.0835, 0.269, 156.0],
 [0.703, 0.123, 1, -21.849, 1, 0.841, 0.0193, 0, 0.107, 0.287, 132.651],
 [0.671, 0.582, 6, -9.4

In [18]:
df_features_favourite = pd.DataFrame(features_list,columns = feature_cols)

In [19]:
# sourcePlaylistID = '16xXJ6LbpSmdCaM7jVVrtX'
# sourcePlaylist = sp.user_playlist(username1, sourcePlaylistID);
# tracks = sourcePlaylist["tracks"];
# songs = tracks["items"];

# track_ids = []
# track_names = []

# for i in range(0, len(songs)):
#     if songs[i]['track']['id'] != None: # Removes the local tracks in your playlist if there is any
#         track_ids.append(songs[i]['track']['id'])
#         track_names.append(songs[i]['track']['name'])

# features = []
# for i in range(0,len(track_ids)):
#     audio_features = sp.audio_features(track_ids[i])
#     for track in audio_features:
#         features.append(track)
        
# playlist_df = pd.DataFrame(features, index = track_names)

In [19]:
df_favourite = pd.concat([df_favourite,df_features_favourite],axis = 1)
df_favourite

Unnamed: 0,track_name,album,track_id,artist,duration,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,10 Feet Down,Perception,68biLwi894rMQPeIiSky2t,NF,217106,63,0.62,0.835,5,-6.634,0,0.0984,0.0964,1.2e-05,0.269,0.0535,96.09
1,Fall Slowly (feat. Ashanti) - Extended Version,Evolution,7wK4pOTZKVaAJ00rziu901,Joyner Lucas,292056,59,0.542,0.453,6,-11.208,0,0.24,0.204,7e-06,0.137,0.0375,88.31
2,Let You Down,Perception,52okn5MNA47tk87PeZJLEL,NF,212120,82,0.662,0.714,5,-5.68,0,0.121,0.312,0.0,0.179,0.464,147.997
3,Piece Of Your Heart,Piece Of Your Heart,1DFD5Fotzgn6yYXkYsKiGs,MEDUZA,152913,82,0.677,0.744,10,-6.806,0,0.0295,0.0404,0.00016,0.074,0.631,124.08
4,Berlin Nights,Berlin Nights,6gG1R1bFdJeNc2ERAwXxCb,Vnce Dolanbay,292115,39,0.901,0.457,10,-13.238,0,0.163,0.229,0.424,0.0977,0.531,127.999
5,Evolution,Evolution,2VopDw2GlF3uwD1kihHmTT,Joyner Lucas,153250,62,0.687,0.819,9,-6.67,0,0.431,0.218,0.0,0.392,0.568,81.185
6,Rolls Royce,Rolls Royce,22XFe65IH0P2RY0uowBqdI,Geegun,143437,68,0.872,0.423,1,-7.337,1,0.213,0.142,1e-05,0.104,0.373,95.98
7,MOSKAU,"REISE, REISE",4L9UGREMQBfYLmGwlACgTV,Rammstein,256306,60,0.493,0.963,11,-3.41,1,0.0638,2.3e-05,0.667,0.305,0.305,147.992
8,Really Really,Islah (Deluxe),10I3CmmwT0BkOVhduDy53o,Kevin Gates,232093,70,0.762,0.666,0,-6.055,1,0.0916,0.00427,0.0,0.127,0.278,118.97
9,Give It to Me - Twin & Vane Remix,Give It to Me (Twin & Vane Remix),6vKqPHrfOVJMsmuVNa4aqy,VANE,176587,50,0.777,0.75,1,-7.21,1,0.105,0.0431,0.00767,0.595,0.328,119.992


In [20]:
from pandas_profiling import ProfileReport
prof = ProfileReport(df_favourite)
#prof.to_file(output_file='output.html')

In [21]:
prof

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=31.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Render HTML'), FloatProgress(value=0.0, max=1.0), HTML(value='')))






In [22]:
#get cosine similarity for all songs within the playlist get songs that are similar
from sklearn.metrics.pairwise import linear_kernel

In [23]:
df_features_favourite.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.62,0.835,5,-6.634,0,0.0984,0.0964,1.2e-05,0.269,0.0535,96.09
1,0.542,0.453,6,-11.208,0,0.24,0.204,7e-06,0.137,0.0375,88.31
2,0.662,0.714,5,-5.68,0,0.121,0.312,0.0,0.179,0.464,147.997
3,0.677,0.744,10,-6.806,0,0.0295,0.0404,0.00016,0.074,0.631,124.08
4,0.901,0.457,10,-13.238,0,0.163,0.229,0.424,0.0977,0.531,127.999


In [24]:
cosine_sim = linear_kernel(df_features_favourite, df_features_favourite)
cosine_sim

array([[ 9303.47387977,  8590.85820785, 14284.8344382 , ...,
        13694.25832572, 15434.65463544,  9488.1209845 ],
       [ 8590.85820785,  7960.89372825, 13164.093367  , ...,
        12628.21761451, 14225.94274221,  8759.6953465 ],
       [14284.8344382 , 13164.093367  , 21961.681771  , ...,
        21051.916669  , 23731.1349471 , 14571.809914  ],
       ...,
       [13694.25832572, 12628.21761451, 21051.916669  , ...,
        20202.4455795 , 22760.05004035, 13973.310919  ],
       [15434.65463544, 14225.94274221, 23731.1349471 , ...,
        22760.05004035, 25650.33754621, 15747.4922354 ],
       [ 9488.1209845 ,  8759.6953465 , 14571.809914  , ...,
        13973.310919  , 15747.4922354 ,  9678.083162  ]])

In [39]:
indices = pd.Series(df_favourite.index, index = df_favourite['track_name']).drop_duplicates()
indices

track_name
10 Feet Down                                               0
Fall Slowly (feat. Ashanti) - Extended Version             1
Let You Down                                               2
Piece Of Your Heart                                        3
Berlin Nights                                              4
Evolution                                                  5
Rolls Royce                                                6
MOSKAU                                                     7
Really Really                                              8
Give It to Me - Twin & Vane Remix                          9
Octave                                                    10
Oh Junge                                                  11
Create                                                    12
Creep - R3HAB Chill Remix                                 13
Sex, Relationships & Terrorism                            14
Dark Room                                                 15
Blue (Da Ba D

- Get the index of the song given its title.

- Get the list of cosine similarity scores for that particular song with all song. Convert it into a list of tuples where the first element is its position, and the second is the similarity score.

- Sort the aforementioned list of tuples based on the similarity scores; that is, the second element.

- Get the top 5 elements of this list. Ignore the first element as it refers to self (the song most similar to a particular song is the song itself).

- Return the titles corresponding to the indices of the top elements.

In [115]:
feature_cols = [x for x in df_favourite.columns[6:]]
feature_cols

['danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo']

In [105]:
from scipy.spatial.distance import cdist

euclid_dist = cdist(df_favourite[feature_cols], df_favourite[feature_cols], 'euclid')

In [107]:
def get_recommendations(song_title, similarity_score = euclid_dist):
    idx = indices[song_title]
    sim_scores = list(enumerate(similarity_score[idx]))
    sim_scores = sorted(sim_scores, key = lambda x: x[1],reverse = True)
    top_scores = sim_scores[0:6]
    song_indices = [i[0] for i in top_scores]
    return df_favourite["track_name"].iloc[song_indices]

In [108]:
euclid_dist.shape

(50, 50)

In [109]:
get_recommendations("Evolution")

24                         Only
19               Slap the Ghost
48    Aretha Franklin Freestyle
32      Movin' Bass - GTA Remix
11                     Oh Junge
34                    Zim Zimma
Name: track_name, dtype: object

In [110]:
get_recommendations("Herr Mannelig")

24                         Only
19               Slap the Ghost
32      Movin' Bass - GTA Remix
48    Aretha Franklin Freestyle
11                     Oh Junge
34                    Zim Zimma
Name: track_name, dtype: object

In [111]:
get_recommendations("Lambo Lambo")

14                    Sex, Relationships & Terrorism
35                                           Calling
38                                     Herr Mannelig
5                                          Evolution
1     Fall Slowly (feat. Ashanti) - Extended Version
39                                       Вид из окна
Name: track_name, dtype: object

In [112]:
for title in df_favourite["track_name"]:
    print(title, "\n")
    print(get_recommendations(title))
    print("------")

10 Feet Down 

24                         Only
19               Slap the Ghost
32      Movin' Bass - GTA Remix
48    Aretha Franklin Freestyle
11                     Oh Junge
34                    Zim Zimma
Name: track_name, dtype: object
------
Fall Slowly (feat. Ashanti) - Extended Version 

24                         Only
19               Slap the Ghost
32      Movin' Bass - GTA Remix
48    Aretha Franklin Freestyle
11                     Oh Junge
34                    Zim Zimma
Name: track_name, dtype: object
------
Let You Down 

14                    Sex, Relationships & Terrorism
35                                           Calling
38                                     Herr Mannelig
5                                          Evolution
1     Fall Slowly (feat. Ashanti) - Extended Version
39                                       Вид из окна
Name: track_name, dtype: object
------
Piece Of Your Heart 

14    Sex, Relationships & Terrorism
24                              Only
19    

In [24]:
def get_featured_playlists(sp):
    id = []
    name = []
    num_tracks = []
 # For looping through the API request  
    response = sp.featured_playlists()
    playlists = response['playlists']
    for i, items in enumerate(playlists['items']):
        id.append(items['id'])
        name.append(items['name'])
        num_tracks.append(items['tracks']['total'])

# Create the final df   
    df_playlists = pd.DataFrame({"id":id, "name": name, "#tracks": num_tracks})
    return df_playlists

In [25]:
df_feat_playlist = get_featured_playlists(sp)

In [26]:
df_feat_playlist

Unnamed: 0,id,name,#tracks
0,37i9dQZF1DX8TvdyVZSYFY,Acoustic Throwbacks,110
1,37i9dQZF1DX0XUsuxWHRQd,RapCaviar,50
2,37i9dQZF1DWXRqgorJj26U,Rock Classics,145
3,37i9dQZF1DWU6Vfrf87eTG,Indie Folk for Focus,116
4,37i9dQZF1DX8Kgdykz6OKj,Jazz Rap,100
5,37i9dQZF1DX0BcQWzuB7ZO,Dance Hits,76
6,37i9dQZF1DWXLeA8Omikj7,Brain Food,100
7,37i9dQZF1DX4UtSsGT1Sbe,All Out 80s,105
8,37i9dQZF1DX4MTfCb9IRyO,Summertime Blues,50
9,37i9dQZF1DX5CdVP4rz81C,Fresh & Chill,77
