In [5]:
import pandas as pd
import numpy
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
data = pd.read_csv('processed_data.csv')
data.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'pos', 'artist_name', 'track_uri',
       'artist_uri', 'track_name', 'album_uri', 'duration_ms_x', 'album_name',
       'name', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url',
       'duration_ms_y', 'time_signature', 'artist_pop', 'genres', 'track_pop'],
      dtype='object')

In [7]:
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms_x,album_name,...,type,id,uri,track_href,analysis_url,duration_ms_y,time_signature,artist_pop,genres,track_pop
0,0,0,0,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
1,1,7734,73,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
2,2,14037,14,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
3,3,21536,42,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
4,4,24404,1,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69


In [8]:
def drop_cols(df):
    df.drop(columns=['pos', "Unnamed: 0",'Unnamed: 0.1', 'track_uri', 'artist_uri', 'album_uri', 'duration_ms_x', 'album_name', 'name', 'uri', 'track_href', 'analysis_url', 'duration_ms_y', 'time_signature', 'type'], inplace = True)

In [9]:
def prep_genres(df):
    df['genres'] = df['genres'].apply(lambda x: x.split(' '))
    return df

In [10]:
def prep_pipeline(df):
    drop_cols(df)
    data[['artist_pop', 'track_pop']] = data[['artist_pop', 'track_pop']].astype('float64')
    df.drop_duplicates(inplace = True)
    prep_genres(df)
    return df

In [11]:
def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity

def getPolarity(text):
  return TextBlob(text).sentiment.polarity

In [12]:
def getAnalysis(score, task="polarity"):
  if task == "subjectivity":
    if score < 1/3:
      return "low"
    elif score > 1/3:
      return "high"
    else:
      return "medium"
  else:
    if score < 0:
      return 'Negative'
    elif score == 0:
      return 'Neutral'
    else:
      return 'Positive'

def sentiment_analysis(df, track_name):
    df['subjectivity'] = df[track_name].apply(getSubjectivity).apply(lambda score: getAnalysis(score, 'subjectivity'))
    df['polarity'] = df[track_name].apply(getPolarity).apply(getAnalysis)
    return df


In [13]:
def ohe(df, col, new_name):
    tf_df = pd.get_dummies(df[col])
    feature_names = tf_df.columns
    tf_df.columns = [new_name + '|' + str(x) for x in feature_names]
    tf_df.reset_index(drop = True, inplace = True)
    return tf_df


In [14]:
def tfidf(df):
    tfidf = TfidfVectorizer()
    tfidf_matrix =  tfidf.fit_transform(data['genres'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]
    genre_df.drop(columns='genre|unknown')
    genre_df.reset_index(drop = True, inplace=True)
    return genre_df

In [15]:
def scale_features(df, cols_to_scale):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df[cols_to_scale])
    df[cols_to_scale] = scaled
    return df

In [16]:
def create_features(df, cols_to_scale):
    genre_df = tfidf(df)
    
    df = sentiment_analysis(df, 'track_name')
    
    subject_ohe = ohe(df, 'subjectivity','subject') * 1
    polar_ohe = ohe(df, 'polarity','polar') * 1
    key_ohe = ohe(df, 'key','key') * 1
    mode_ohe = ohe(df, 'mode','mode') * 1

    scale_features(df, cols_to_scale)
    final = pd.concat([df, genre_df, subject_ohe, polar_ohe, key_ohe, mode_ohe], axis = 1)
    final.drop(['genres', 'subjectivity', 'polarity', 'key', 'mode'], axis = 1, inplace = True)
    return final


In [17]:
data = prep_pipeline(data)
data.columns

Index(['artist_name', 'track_name', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'id', 'artist_pop', 'genres',
       'track_pop'],
      dtype='object')

In [18]:
float_cols = data.select_dtypes(include='float').columns
float_cols

Index(['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'artist_pop',
       'track_pop'],
      dtype='object')

In [19]:
data = create_features(data, float_cols)

In [20]:
data.dropna(inplace = True, ignore_index = True)

In [21]:
data.isnull().sum()

artist_name     0
track_name      0
danceability    0
energy          0
loudness        0
               ..
key|9           0
key|10          0
key|11          0
mode|0          0
mode|1          0
Length: 2181, dtype: int64

In [22]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

In [62]:
client_id = 'YOUR_CLIENT_ID'
client_secret = 'YOUR_CLIENT_SECRET'
redirect_uri='http://localhost:8080/'
scope = 'playlist-read-private'

In [24]:
auth_manager = SpotifyOAuth(client_id=client_id,
                            client_secret=client_secret,
                            redirect_uri=redirect_uri,
                            scope=scope)

In [25]:
token_info = auth_manager.get_access_token(as_dict=False)

In [26]:
sp = spotipy.Spotify(auth=token_info)
playlists = sp.current_user_playlists()
for idx, playlist in enumerate(playlists['items']):
    print(f"{idx+1}: {playlist['name']} \t {playlist['id']}")
else:
    print("No token provided")

1: Study mode 	 00snmXGddRgDDvOOYmJiFk
2: 👍 	 6EkxxkVdnlfwGCieQ5tJKL
3: Scream Able songs 	 4MD33Jn7xN24MqCF2FzI5M
4: Sleeping at Last 😌 	 32xDH3sr270epYybzyKe3d
5: My playlist #1 	 7IBkqI4phHD4jbyfq3IycB
6: Chai & Classics 	 37i9dQZF1DWY1kDGbdPb81
7: NF 	 1Ax8lzcyfEYgI6aEvoi6bM
8: Mental food 	 52bUff1hDnsN5UJpXyGLSC
9: Night Chill 2024 | Night Vibes 🌃 	 7tI15sEAA2Hu8VBUvsmzC0
10: Badass Villain Vibes! 	 5HhOezXefwrhf3oeXRTZoj
11: 🔥 	 3n04eMo3ohEnF8egs8BpTz
12: Why are you here? 	 3vhW2xueSxKtAiXo9fwL7l
13: T-T 	 345NLK6meHaGBXsWbbH01J
14: T-T 	 7qtu0I2cNBPZ4HUTjyephU
15: Sapphireazura + Rhythm 	 37i9dQZF1EJtrJ5DFkc7SE
16: the most beautiful songs I’ve ever heard 	 52Dba0Em73WbnRLr6yOEUf
17: 🤘 	 6ADXpy777LnQfA7rEKtrSJ
18: One Direction 	 0GxJyE2IBLqIBTqi20nSta
19: OMAM 	 2MaVj60FTFIjGHzd0i9PM3
20: Rap 	 4cXCXiBmJtrV7mtcbKMfeF
21: Imagine dragons 	 3RCULh6wgYaU93NVE4DdXD
22: Missing you , my friend 	 4nI7LqD5WwnE9YhnRoERKN
23: Feels 	 07gUdfWWMaR9NGnzVuduBZ
24: MM 	 6oblbgBPcjcNIOZiaTV

In [27]:
# tracks = sp.playlist_tracks('4TPYQDlQuoHmOaYDBGz35x')
# for idx, item in enumerate(tracks['items']):
#         track = item['track']
#         print(f"{idx+1}: {track['name']} by {track['artists'][0]['name']}")

In [28]:
def name_to_id(playlist_name, playlists):
    for playlist in playlists['items']:
        if playlist['name'] == playlist_name:
            return playlist['id']
    return 'playlist not found'

In [29]:
def get_tracks(playlist_id):
    tracks = sp.playlist_tracks(playlist_id)
    return tracks

In [30]:
# for idx, item in enumerate(tracks['items']):
#         track = item['track']
#         print(f"    {idx+1}: {track['name']} by {track['artists'][0]['name']}")

In [31]:
def make_playlist_df(playlist_name, playlists, data):
    playlist_df = pd.DataFrame()
    tracks = get_tracks(name_to_id(playlist_name, playlists))
    for idx, track in enumerate(tracks['items']):
        playlist_df.loc[idx, 'artist'] = track['track']['artists'][0]['name']
        playlist_df.loc[idx, 'name'] = track['track']['name']
        playlist_df.loc[idx, 'id'] = track['track']['id']
        
    playlist_df = playlist_df[playlist_df['id'].isin(data['id'].values)]
    return playlist_df

In [53]:
play = make_playlist_df("Sapphireazura + Rhythm", playlists, data)

In [54]:
columns = ['id'] + [col for col in data.columns if col != 'id']
data = data[columns]

In [55]:
play

Unnamed: 0,artist,name,id
4,Christina Perri,a thousand years,6lanRgr6wXibZr8KgzXxBl
6,Ed Sheeran,Photograph,6fxVffaTuwjgEk5h9QyRjy
8,Linkin Park,In the End,60a0Rd6pjrkxjPbaKzXjfq
10,KALEO,Way down We Go,0y1QJc3SJVPKJ1OvFmFqe6
11,The Chainsmokers,Closer,7BKLCZ1jbUBVqRi2FVlTVw
17,Carly Rae Jepsen,Call Me Maybe,20I6sIOMTCkB6w7ryavxtO
18,Coldplay,Viva La Vida,1mea3bSkSGXuIRvnydlB5b
38,Maroon 5,Maps,4gbVRS8gloEluzf0GzDOFc
45,Travis Scott,goosebumps,6gBFPUFcJLzWGx4lenP6h2
46,Coldplay,Hymn for the Weekend,3RiPr603aXAoi4GHyXx0uy


In [56]:
def make_playlist_vector(data, playlist_df):
    complete_playlist_df = data[data['id'].isin(playlist_df['id'].values)]
    search_dataset = data[~data['id'].isin(playlist_df['id'].values)]
    return complete_playlist_df.iloc[:, 3:].sum(axis = 0), search_dataset

In [57]:
play, search_dataset = make_playlist_vector(data, play)

In [58]:
search_dataset.select_dtypes(include='object').columns

Index(['id', 'artist_name', 'track_name'], dtype='object')

In [59]:
search_dataset.shape, play.shape

((10214, 2181), (2178,))

In [60]:
# (#songs, #features) X (#features, 1)
# search_dataset X playlist_vector.T
def get_recommendations(search_dataset, playlist_vector):
    search_dataset['similarity'] = cosine_similarity(search_dataset.iloc[:, 3:].values, playlist_vector.values.reshape(1, -1))[:,0]
    top_10 = search_dataset.sort_values('similarity',ascending = False).head(10)
    return top_10

In [61]:
recs = get_recommendations(search_dataset, play)
recs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  search_dataset['similarity'] = cosine_similarity(search_dataset.iloc[:, 3:].values, playlist_vector.values.reshape(1, -1))[:,0]


Unnamed: 0,id,artist_name,track_name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,...,key|5,key|6,key|7,key|8,key|9,key|10,key|11,mode|0,mode|1,similarity
4228,4Y7XAxTANhu3lmnLAzhWJW,Pitbull,Fireball,0.770243,0.933,0.870216,0.058212,0.091165,8.6e-05,0.0607,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.903421
2013,3S0OXQeoh0w6AY8WQVckRW,Jason Mraz,I'm Yours,0.694332,0.457,0.823344,0.048649,0.59739,0.0,0.105,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.900725
4259,02M6vucOvmRfMxTXDUwRXu,Beyoncé,7/11,0.756073,0.705,0.874088,0.130977,0.012851,0.0,0.126,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.899262
4687,386RUes7n1uM1yfzgeUuwp,Bruno Mars,The Lazy Song,0.803644,0.711,0.874295,0.072661,0.301205,0.0,0.0955,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.898161
1880,2rizacJSyD9S1IQUxUxnsK,The Chainsmokers,All We Know,0.67004,0.586,0.815394,0.031913,0.09739,0.002734,0.115,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.897562
1192,2ekn2ttSfGqwhhate0LSR0,Dua Lipa,New Rules,0.771255,0.7,0.860004,0.072141,0.00262,1.6e-05,0.153,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.897355
6716,4QlzkaRHtU8gAdwqjWmO8n,The Cure,Friday I'm In Love,0.532389,0.763,0.754182,0.035447,0.001265,6.8e-05,0.359,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.896138
9422,2VPjeDCsNEyoUXbEwphx1K,Troye Sivan,Fun,0.643725,0.789,0.849505,0.07131,0.138554,2e-06,0.168,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.895731
5431,0ERbK7qVqveCaBWIiYCrl3,Romeo Santos,Bella y Sensual,0.642713,0.713,0.845856,0.097401,0.228916,0.0,0.0506,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.895237
4638,343YBumqHu19cGoGARUTsd,Drake,Fake Love,0.939271,0.481,0.806966,0.298337,0.105422,0.0,0.176,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.895088
