Importing Libraries

In [1]:
#https://open.spotify.com/playlist/1sJlI1n2fVpa9YlhBLLSzz?si=93281880e1b94467

import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from datetime import datetime
import cv2
from deepface import DeepFace

Setting up the Spotify's API

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id='ac1f41b0f93c45ac8af29623bdf94e0a', client_secret='f2460ff1a77e4c6db5eca633dde46d3b')
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

playlist_link = input("Enter the link: ")

playlist_URI = playlist_link.split("/")[-1].split("?")[0]

songs = sp.playlist_items(playlist_URI, market='IN')['items']  #will give a list of songs (meta-data)

Extracting the song meta data

In [3]:
song_name = []
song_id = []
song_popu = []
song_added_date = []
song_release_date = []
artists_col = []
for song in songs:
    song_name.append(song['track']['name'])
    song_id.append(song['track']['id'])
    song_popu.append(song['track']['popularity'])
    song_added_date.append(song['added_at'])
    song_release_date.append(song['track']['album']['release_date'])
    all_artists = song['track']['artists']
    artists = []
    for a in all_artists:
        artists.append(a['name'])
    artists_col.append(artists)

Creating the dataframe and extracting the song features

In [4]:
#combining the data

df = pd.DataFrame({
    'name':song_name,
    'popularity':song_popu,
    'date_added':pd.to_datetime(song_added_date),
    'release_year':list(map(lambda x: int(x[:4]), song_release_date)),
    'artists':artists_col
    })

#audio features

features = sp.audio_features(song_id)

feat_names = list(sp.audio_features(song_id)[0].keys())

for row in range(len(features)):
    for col in range(len(feat_names)):
        df.loc[row, feat_names[col]] = features[row][feat_names[col]]
df.head()

Unnamed: 0,name,popularity,date_added,release_year,artists,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,"Tujhe Kitna Chahne Lage (From ""Kabir Singh"")",79,2023-07-17 19:57:01+00:00,2019,"[Arijit Singh, Mithoon]",0.427,0.396,11.0,-7.556,1.0,...,0.11,0.306,70.368,audio_features,2Fv2injs4qAm8mJBGaxVKU,spotify:track:2Fv2injs4qAm8mJBGaxVKU,https://api.spotify.com/v1/tracks/2Fv2injs4qAm...,https://api.spotify.com/v1/audio-analysis/2Fv2...,284780.0,4.0
1,"Tera Ban Jaunga (From ""Kabir Singh"")",69,2023-07-17 19:57:01+00:00,2019,"[Akhil Sachdeva, Tulsi Kumar]",0.605,0.53,11.0,-6.346,1.0,...,0.131,0.399,93.951,audio_features,4OcvkkpF3xmyCGGY5IBlgi,spotify:track:4OcvkkpF3xmyCGGY5IBlgi,https://api.spotify.com/v1/tracks/4OcvkkpF3xmy...,https://api.spotify.com/v1/audio-analysis/4Ocv...,236437.0,4.0
2,"Duniyaa (From ""Luka Chuppi"")",78,2023-07-17 19:57:01+00:00,2019,"[Akhil, Dhvani Bhanushali, Kunaal Vermaa]",0.435,0.658,7.0,-5.949,0.0,...,0.125,0.524,77.17,audio_features,2tjWCe2W7sgvS3C8NHcdtI,spotify:track:2tjWCe2W7sgvS3C8NHcdtI,https://api.spotify.com/v1/tracks/2tjWCe2W7sgv...,https://api.spotify.com/v1/audio-analysis/2tjW...,222501.0,4.0
3,"Kaise Hua (From ""Kabir Singh"")",77,2023-07-17 19:57:01+00:00,2019,[Vishal Mishra],0.385,0.464,2.0,-6.975,1.0,...,0.109,0.15,137.839,audio_features,1y1rQTkWmrZdJmjwuK07GC,spotify:track:1y1rQTkWmrZdJmjwuK07GC,https://api.spotify.com/v1/tracks/1y1rQTkWmrZd...,https://api.spotify.com/v1/audio-analysis/1y1r...,234722.0,4.0
4,,0,2023-07-17 19:57:01+00:00,0,[],0.56,0.512,1.0,-7.185,0.0,...,0.056,0.39,123.991,audio_features,7lGhrfren1Zpwh0ds0zuUg,spotify:track:7lGhrfren1Zpwh0ds0zuUg,https://api.spotify.com/v1/tracks/7lGhrfren1Zp...,https://api.spotify.com/v1/audio-analysis/7lGh...,254516.0,4.0


Recency Factor

In [5]:
curr_month = datetime.today().month
curr_year = datetime.today().year

recency = list(map(lambda x: curr_month - x.month if (x.year == curr_year) else curr_month + (12 - x.month)
                    + (curr_year - x.year - 1) * 12, df['date_added']))
df['recency'] = recency

Normalizing Popularity before ohe

In [6]:
# popu ranges between 0 to 100, so normalizing it to 0 to 20
df['popularity'] = list(map(lambda x: x // 5, df['popularity']))

In [7]:
# deleting the rows whereever year is null
for i in range(len(df['release_year'])):
    if df.loc[i, 'release_year'] == 0:
        df.drop(i, inplace=True)

# One Hot Encoding

OHE on Popularity and Year

In [8]:
# for cosin similarity, we need the size of the vectors to be same, so we are genralizing the columns

for i in range(0,21):
    df[f"popu|{i}"] = [0] * len(df['name'])

for i in range(1980, datetime.today().year + 1):
    df[f"year|{i}"] = [0] * len(df['name'])

In [9]:
# this will create dataframe with the columns of unique values in the series
df_year = pd.get_dummies(df['release_year'])
df_popu = pd.get_dummies(df['popularity'])

# assigning names to the columns
df_year.columns = map(lambda x: 'year' + '|' + str(x), df_year.columns)
df_popu.columns = map(lambda x: 'popu' + '|' + str(x), df_popu.columns)
# df_popu.head()

In [10]:
#now updating the columns with values wherever needed

for col in df_popu.columns:
    df[col] = df_popu[col]
for col in df_year.columns:
    df[col] = df_year[col]

In [11]:
# df.iloc[:20, 24:]

OHE on Artists

In [12]:
# this file contains artists names which will be used for ohe
artists_excel = pd.read_excel('datasets/artists_names.xlsx')

In [13]:
# creating dummy dataframe for ohe-ing the artists
zeros = [0] * len(df['name'])
extra = pd.DataFrame(zeros)
for name in artists_excel['artists']:
    extra[f"artist|{name}"] = 0

new_df = pd.concat([df, extra], axis=1)
new_df.dropna(axis=0, inplace=True)

# to place 1 whenever the artist in row cell matches with the column artist
for i, row in new_df.iterrows():
    for name in row['artists']:
        if name in list(artists_excel['artists']):
            new_df.loc[i, f"artist|{name}"] = 1

new_df = new_df.drop(0, axis=1)

In [14]:
new_df = new_df.copy()

In [15]:
# new_df.iloc[:20, 91:]

In [16]:
'''# new df for generating the recommedation vector
# we are dropping the non-integer columns as they are of no use in calulating the similarity
'''
recomm_vec_df = new_df.drop(['name', 'popularity', 'date_added', 'release_year', 'type', 'id', 'uri', 'track_href',  'analysis_url', 'artists'], axis=1)
# recomm_vec_df.columns

In [25]:
recomm_vec_df.columns[14:]

Index(['popu|0', 'popu|1', 'popu|2', 'popu|3', 'popu|4', 'popu|5', 'popu|6',
       'popu|7', 'popu|8', 'popu|9',
       ...
       'artist|Lost Frequencies', 'artist|Miss Congeniality', 'artist|Torpe',
       'artist|Baby Tyson', 'artist|Seb Tik', 'artist|Confetti',
       'artist|Victoria Justice', 'artist|Tainy', 'artist|Glee Cast',
       'artist|Jamie Miller'],
      dtype='object', length=1297)

Calculating Bias

In [26]:
'''
now calculating the bias which are going to be multiplied with each of the rows individually.
for that we need to understand this that the bias must reduce the values of the older songs, so we need bias
to be between 0 and 1
1 / recency is not working as it is drastically reducing the values whih can negatively impact the recommendations
0.9 ** recency might work. For recency 3, we get the weight as 0.729, which is totally fine as we tend to listen less
to songs which are older than 3 months. Also, going below this value can trigger false recommendations
through this we are actually reducing the effect of older (added) songs
Applying only on the OHE columns as if applied on features, then avg will get affected, and we will get false predictions
'''

recomm_vec_df['bias'] = list(map(lambda x: round(0.9 ** x, 5), list(recomm_vec_df['recency'])))
for col in recomm_vec_df.columns[14:]:
    recomm_vec_df[col] = recomm_vec_df[col] * recomm_vec_df['bias']
# recomm_vec_df.head(10)

In [None]:
# list(recomm_vec_df.columns)

In [27]:
# deleting the bias and recency columns
recomm_vec_df = recomm_vec_df.dropna().drop(['bias', 'recency', 'key', 'mode', 'duration_ms', 'time_signature'], axis=1)
recomm_vec_df['tempo'] = recomm_vec_df['tempo'].apply(lambda x: (x - min(recomm_vec_df['tempo'])) / (max(recomm_vec_df['tempo'] - min(recomm_vec_df['tempo']))))
recomm_vec_df['loudness'] = recomm_vec_df['loudness'].apply(lambda x: (x - min(recomm_vec_df['loudness'])) / (max(recomm_vec_df['loudness'] - min(recomm_vec_df['loudness']))))

In [28]:
recomm_vec_df.head(9)

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,popu|0,...,artist|Lost Frequencies,artist|Miss Congeniality,artist|Torpe,artist|Baby Tyson,artist|Seb Tik,artist|Confetti,artist|Victoria Justice,artist|Tainy,artist|Glee Cast,artist|Jamie Miller
0,0.427,0.396,0.5617,0.0366,0.736,0.0,0.11,0.306,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.605,0.53,0.676468,0.0246,0.822,0.0,0.131,0.399,0.194164,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.435,0.658,0.714123,0.0368,0.699,0.0,0.125,0.524,0.056002,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.385,0.464,0.616807,0.0404,0.452,0.0,0.109,0.15,0.555504,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.483,0.468,0.553543,0.0341,0.784,0.0,0.152,0.319,0.539334,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.72,0.63,0.752063,0.0413,0.615,0.0,0.108,0.512,0.08761,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.695,0.727,0.683297,0.0323,0.0744,0.000175,0.533,0.864,0.392445,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.799,0.828,0.818742,0.0782,0.424,0.0,0.145,0.627,0.285463,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.756,0.52,0.458693,0.0341,0.495,0.0,0.0901,0.684,0.407973,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Calculating the Recommendation vectors

In [29]:
# this one will create the song features columns vector
recomm_vec1 = np.array(list(map(lambda col: recomm_vec_df[col].mean(), recomm_vec_df.loc[:, :"tempo"].columns)))
# this one will create the ohe columns till current year vector
recomm_vec2 = np.array(list(map(lambda col: sum(recomm_vec_df[col]), recomm_vec_df.loc[:, "popu|0":f"year|{datetime.today().year}"].columns)))
# artists only ohe columns vector
recomm_vec3 = np.array(list(map(lambda col: sum(recomm_vec_df[col]), recomm_vec_df.iloc[:, -len(artists_excel['artists']):].columns)))

# Emotion Capture

In [None]:
#emotion code

face_cascade = cv2.CascadeClassifier('haarcascade.xml')

cap = cv2.VideoCapture(0)
while True:
    ret,frame = cap.read()

    result = DeepFace.analyze(img_path = frame , actions=['emotion'], enforce_detection=False )

    gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gray,1.1,4)

    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 3)

    emotion = result[0]['dominant_emotion']

    txt = str(emotion)

    cv2.putText(frame, txt, (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 3)
    cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xff == ord('q'):
        break



# print(emotion)
cap.release()
cv2.destroyAllWindows()

#output: emotion


# Emotion Based Filtering

In [31]:
# this is the pre-processed dataset containing the 1000s of songs
data = pd.read_csv('datasets/final_data.csv')

In [32]:
# columns which will be used for the filtering
filt_col = ['acousticness', 'danceability', 'energy', 'loudness', 'tempo', 'valence']

# values for filtering (Emotion specific)
happy_low = [0, 0.57, 0.4, -10.4, 75 ,0.25]
sad_low = [0.2, 0.3, 0.25, -11, 70, 0]
chill_low = [0, 0.35, 0.25, -12.7, 80, 0.2]
angry_low = [0, 0.46, 0.56, -11, 90, 0.2]

happy_high = [0.75, 0.86, 1, -3, 170, 1]
sad_high = [0.9, 0.7, 0.8, -4, 160, 0.7]
chill_high = [0.85, 0.8, 0.8, -4, 165, 0.9]
angry_high = [0.6, 0.85, 1, -4, 170, 0.75]

happy_avg = [0.715, 0.7, 0.375, -6.7, 0.625, 123]
sad_avg = [0.5, 0.525, 0.55, -7.5, 0.3, 115]
chill_avg = [0.575, 0.525, 0.425, -8.35, 0.55, 122.5]
angry_avg = [0.655, 0.78, 0.3, -7.5, 0.475, 130]

In [33]:
i = 0
if emotion == 'happy':
    for col in filt_col:
        data = data[(data[col] > happy_low[i]) & (data[col] < happy_high[i])]
        i += 1

    sim = []
    for i in range(len(data)):
        e = data.loc[:, filt_col].iloc[i].values
        sim.append(np.linalg.norm(e - happy_avg)/70)
    data['sim'] = (np.array(sim) - max(sim)) * (-1)
    print(data['sim'])

elif emotion == 'sad':
    for col in filt_col:
        data = data[(data[col] > sad_low[i]) & (data[col] < sad_high[i])]
        i += 1
        
    sim = []
    for i in range(len(data)):
        e = data.loc[:, filt_col].iloc[i].values
        sim.append(np.linalg.norm(e - sad_avg)/70)
    data['sim'] = (np.array(sim) - max(sim)) * (-1)
    print(data['sim'])

elif emotion == 'neutral':
    for col in filt_col:
        data = data[(data[col] > chill_low[i]) & (data[col] < chill_high[i])]
        i += 1
    
    sim = []
    for i in range(len(data)):
        e = data.loc[:, filt_col].iloc[i].values
        sim.append(np.linalg.norm(e - chill_avg)/70)
    data['sim'] = (np.array(sim) - max(sim)) * (-1)
    print(data['sim'])

elif emotion == 'angry':
    for col in filt_col:
        data = data[(data[col] > angry_low[i]) & (data[col] < angry_high[i])]
        i += 1

    sim = []
    for i in range(len(data)):
        e = data.loc[:, filt_col].iloc[i].values
        sim.append(np.linalg.norm(e - angry_avg)/70)
    data['sim'] = (np.array(sim) - max(sim)) * (-1)
    print(data['sim'])

8       0.388613
22      0.798198
23      0.559449
33      0.744639
37      0.805157
          ...   
1372    0.171794
1378    0.249258
1395    0.781372
1402    0.281229
1432    0.447102
Name: sim, Length: 503, dtype: float64


In [34]:
# data.describe()

In [35]:
data_filtered = data.drop(['name', 'popularity', 'date_added', 'release_year', 'type', 'id', 'uri', 'track_href',  'analysis_url', 'artists', 'Unnamed: 0', 'key', 'mode', 'duration_ms', 'time_signature'], axis=1)
# data_filtered.drop(0, axis=1, inplace=True)

In [36]:
data_filtered['tempo'] = data_filtered['tempo'].apply(lambda x: (x - min(data_filtered['tempo'])) / (max(data_filtered['tempo'] - min(data_filtered['tempo']))))
data_filtered['loudness'] = data_filtered['loudness'].apply(lambda x: (x - min(data_filtered['loudness'])) / (max(data_filtered['loudness'] - min(data_filtered['loudness']))))
data_filtered

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,popu|0,...,artist|Miss Congeniality,artist|Torpe,artist|Baby Tyson,artist|Seb Tik,artist|Confetti,artist|Victoria Justice,artist|Tainy,artist|Glee Cast,artist|Jamie Miller,sim
8,0.534,0.481,0.279722,0.0412,0.697,0.000000,0.142,0.2930,0.608582,0,...,0,0,0,0,0,0,0,0,0,0.388613
22,0.405,0.665,0.788545,0.0328,0.614,0.000000,0.525,0.4040,0.128899,0,...,0,0,0,0,0,0,0,0,0,0.798198
23,0.531,0.431,0.291148,0.0370,0.868,0.000025,0.127,0.1900,0.419757,0,...,0,0,0,0,0,0,0,0,0,0.559449
33,0.506,0.508,0.666763,0.0291,0.580,0.000000,0.129,0.3120,0.198085,0,...,0,0,0,0,0,0,0,0,0,0.744639
37,0.652,0.681,0.489586,0.0379,0.503,0.000008,0.181,0.6770,0.124080,0,...,0,0,0,0,0,0,0,0,0,0.805157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,0.694,0.743,0.966590,0.0833,0.355,0.000000,0.275,0.5540,0.833222,0,...,0,0,0,0,0,0,0,0,0,0.171794
1378,0.471,0.326,0.418716,0.0378,0.496,0.000000,0.352,0.2470,0.752901,0,...,0,0,0,0,0,0,0,0,0,0.249258
1395,0.650,0.574,0.094012,0.1010,0.396,0.000112,0.354,0.4410,0.151683,0,...,0,0,0,0,0,0,0,0,0,0.781372
1402,0.591,0.534,0.251519,0.0815,0.410,0.000055,0.373,0.1510,0.719217,0,...,0,0,0,0,0,0,0,0,0,0.281229


In [37]:
data_filtered.iloc[:20, -1]

8     0.388613
22    0.798198
23    0.559449
33    0.744639
37    0.805157
44    0.442297
55    0.050059
56    0.337576
58    0.182553
59    0.460331
62    0.461824
63    0.756252
66    0.361760
67    0.691615
70    0.399911
72    0.813261
75    0.184120
77    0.320697
81    0.184050
82    0.410664
Name: sim, dtype: float64

Comparing the Vectors/Rows Using Similarity Measures

In [38]:
'''
Using Euclidian Distance as both magnitude and directions are important
Euclidean distance measures the distance between two points in a multidimensional space by calculating the square
root of the sum of the squared differences between their corresponding elements. It is suitable for continuous data
where the magnitude and direction of each feature are important.
'''

l1 = []
l2 = []
l3 = []
s = 0
recommendations = pd.DataFrame({'name': data['name'], 'artists':data['artists'], 'id': data['id'], 'sim': data['sim']})
for i in range(len(data_filtered)):
    # this contains the columns from start till the ohe
    data_1 = data_filtered.loc[:, :"tempo"].iloc[i].values
    # this contains the ohe columns till current year
    data_2 = data_filtered.loc[:, "popu|0":f"year|{datetime.today().year}"].iloc[i].values
    # this contains the artists only columns
    data_3 = data_filtered.iloc[:, (-len(artists_excel['artists']) - 1):-1].iloc[i].values

    sim1 = np.linalg.norm(recomm_vec1 - data_1)  # euclidian distance
    '''
    we are getting a dissimilarity score, as greater the difference 
    between the values, higher would be the score. The values which differ largerly with respect to the vector
    will tend to have a higher eucladian score
    '''

    # simply using dot product
    sim2 = np.dot(recomm_vec2, data_2)

    sim3 = np.dot(recomm_vec3, data_3)

    l1.append(round(sim1, 6))
    l2.append(round(sim2, 6))
    l3.append(round(sim3, 6))

l1 = (np.array(l1) - max(l1)) * (-1)  # converting it into a similarity score

# normalizing the array values to 0-1 range for proper contribution in the recommendation
l2 = (np.array(l2) - min(l2)) / (max(l2) - min(l2))
l3 = (np.array(l3) - min(l3)) / (max(l3) - min(l3)) * 0.5

score = l1 + l2 + l3
# print(type(recommendations['sim'][0]))
# print(type(recommendations['sim']), type(pd.Series(score)))
recommendations['sim'] = recommendations['sim'] + score  # as sim col is already filled with emotion effiency score

In [39]:
recommendations

Unnamed: 0,name,artists,id,sim
8,Khairiyat,"['Pritam', 'Arijit Singh']",5O932cZmzOZGOGZz9RHx20,2.115671
22,Phir Kabhi,['Arijit Singh'],4jk4CaqBMBbMZhf3PuR1ai,1.851990
23,Kaun Tujhe,['Palak Muchhal'],5T2ZZiBMDGh3TZDUbxg4rV,1.611176
33,Mar Jaayen,['Atif Aslam'],1ZGZsRhYm4XEU7ZWhYBxJA,1.946485
37,Woh Din,"['Pritam', 'Arijit Singh']",5qtEWwRUX3GKgpWwDQf9SA,2.435788
...,...,...,...,...
1372,Die 4 Me,['Halsey'],6VYAzAQGFsfEAwdorQaZuU,0.894211
1378,Sick of Me,['Alexis Donn'],1DA6LxSD9FCzKXjAhzuA5V,0.621874
1395,labour,['Paris Paloma'],2Ggr9IfS70wYQacW8nZKPG,1.359395
1402,TRRST,"['IC3PEAK', 'ZillaKami']",0M56diRArdi0vRYmLBeltn,1.283734


In [40]:
data_filtered.iloc[:, (-len(artists_excel['artists']) - 1):-1]

Unnamed: 0,artist|Pritam,artist|Arijit Singh,artist|Amitabh Bhattacharya,artist|Kaifi Khalil,artist|Tanishk Bagchi,artist|Jubin Nautiyal,artist|Asees Kaur,artist|King,artist|Jasleen Royal,artist|Prerna Arora,...,artist|Lost Frequencies,artist|Miss Congeniality,artist|Torpe,artist|Baby Tyson,artist|Seb Tik,artist|Confetti,artist|Victoria Justice,artist|Tainy,artist|Glee Cast,artist|Jamie Miller
8,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
33,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
37,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1378,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1395,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1402,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
recommendations.drop_duplicates(['id'], inplace=True)

# sorting the recommendations
recommendations = recommendations.sort_values(['sim'], axis=0, ascending=False)

In [42]:
recommendations = recommendations.reset_index().drop('index', axis=1)
print("No. of Song Recommendations: ", len(recommendations))

No. of Song Recommendations:  503


# Displaying the Recommendations

In [43]:
# first 50 recommendations
recommendations.head(50)

Unnamed: 0,name,artists,id,sim
0,"Bandeya (feat. Arijit Singh) - From ""Dil Juung...","['Shaarib Toshi', 'Arijit Singh']",6gwnKOvdBKkTU4CQ7KpIAu,2.583026
1,Tera Yaar Hoon Main,"['Rochak Kohli', 'Arijit Singh']",4ZTx87kAgEwlPMzoojFZWg,2.550171
2,"Humdard (From ""Ek Villain"")",['Arijit Singh'],2PvaburAUlhNHxVhw5lvOq,2.541405
3,Tujhe Kitna Chahne Lage,['Arijit Singh'],5QtEFRYavs5S3GHtFEq7A4,2.498399
4,"Tujhe Kitna Chahne Lage (From ""Kabir Singh"")","['Arijit Singh', 'Mithoon']",2Fv2injs4qAm8mJBGaxVKU,2.486243
5,O Saathi,"['Atif Aslam', 'Arko']",6fScgF2Y63ScU73nYju0ny,2.451813
6,Woh Din,"['Pritam', 'Arijit Singh']",5qtEWwRUX3GKgpWwDQf9SA,2.435788
7,"Tum Hi Aana (From ""Marjaavaan"")","['Payal Dev', 'Jubin Nautiyal', 'Kunaal Vermaa']",6E9UwSfT80age2xknoMS7Y,2.413212
8,Humnava Mere,"['Jubin Nautiyal', 'Rocky - Shiv']",0loZn1c5heXie7OAtvK6nH,2.370169
9,Ek Tarfa,['Darshan Raval'],64r6z0P3RnhpTGdkA7p5Os,2.349805


In [44]:
# for playing the songs but requires premium account
# sp.start_playback(uris=['spotify:track:1stiSonuKkZqhI1o9nZ9MT'])

THE END 🙂