# Spotify songs Recommendation system

In [88]:
import numpy as np
import pandas as pd
from numpy.linalg import norm

For this project, we will be using this dataset that contains audio statistics of the *top 2000 tracks on Spotify from 2000-2019*.
The data contains about 18 columns each describing the track and its qualities.
Unfortunately, we don't have any kind of data about the users, so the only thing we can do is some sort of content-based filtering using the *cosine similarity*.


Let's start by visualizing the dataset:

In [148]:
pd.read_csv('songs_normalize.csv')

Unnamed: 0,artist,song,duration_ms,explicit,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre
0,Britney Spears,Oops!...I Did It Again,211160,False,2000,77,0.751,0.834,1,-5.444,0,0.0437,0.3000,0.000018,0.3550,0.894,95.053,pop
1,blink-182,All The Small Things,167066,False,1999,79,0.434,0.897,0,-4.918,1,0.0488,0.0103,0.000000,0.6120,0.684,148.726,"rock, pop"
2,Faith Hill,Breathe,250546,False,1999,66,0.529,0.496,7,-9.007,1,0.0290,0.1730,0.000000,0.2510,0.278,136.859,"pop, country"
3,Bon Jovi,It's My Life,224493,False,2000,78,0.551,0.913,0,-4.063,0,0.0466,0.0263,0.000013,0.3470,0.544,119.992,"rock, metal"
4,*NSYNC,Bye Bye Bye,200560,False,2000,65,0.614,0.928,8,-4.806,0,0.0516,0.0408,0.001040,0.0845,0.879,172.656,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Jonas Brothers,Sucker,181026,False,2019,79,0.842,0.734,1,-5.065,0,0.0588,0.0427,0.000000,0.1060,0.952,137.958,pop
1996,Taylor Swift,Cruel Summer,178426,False,2019,78,0.552,0.702,9,-5.707,1,0.1570,0.1170,0.000021,0.1050,0.564,169.994,pop
1997,Blanco Brown,The Git Up,200593,False,2019,69,0.847,0.678,9,-8.635,1,0.1090,0.0669,0.000000,0.2740,0.811,97.984,"hip hop, country"
1998,Sam Smith,Dancing With A Stranger (with Normani),171029,False,2019,75,0.741,0.520,8,-7.513,1,0.0656,0.4500,0.000002,0.2220,0.347,102.998,pop


In [149]:
df = pd.read_csv('songs_normalize.csv')
# Here we create a copy of the dataframe only with the "numerical" features
train_df = df.drop(labels=['explicit', 'genre'], axis=1)
# We add the column that will help us rank the songs
df.insert(2, "similarity", 0)

Here we can visualize the new dataframe to get a better intuition of what we are doing:

In [150]:
df

Unnamed: 0,artist,song,similarity,duration_ms,explicit,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre
0,Britney Spears,Oops!...I Did It Again,0,211160,False,2000,77,0.751,0.834,1,-5.444,0,0.0437,0.3000,0.000018,0.3550,0.894,95.053,pop
1,blink-182,All The Small Things,0,167066,False,1999,79,0.434,0.897,0,-4.918,1,0.0488,0.0103,0.000000,0.6120,0.684,148.726,"rock, pop"
2,Faith Hill,Breathe,0,250546,False,1999,66,0.529,0.496,7,-9.007,1,0.0290,0.1730,0.000000,0.2510,0.278,136.859,"pop, country"
3,Bon Jovi,It's My Life,0,224493,False,2000,78,0.551,0.913,0,-4.063,0,0.0466,0.0263,0.000013,0.3470,0.544,119.992,"rock, metal"
4,*NSYNC,Bye Bye Bye,0,200560,False,2000,65,0.614,0.928,8,-4.806,0,0.0516,0.0408,0.001040,0.0845,0.879,172.656,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Jonas Brothers,Sucker,0,181026,False,2019,79,0.842,0.734,1,-5.065,0,0.0588,0.0427,0.000000,0.1060,0.952,137.958,pop
1996,Taylor Swift,Cruel Summer,0,178426,False,2019,78,0.552,0.702,9,-5.707,1,0.1570,0.1170,0.000021,0.1050,0.564,169.994,pop
1997,Blanco Brown,The Git Up,0,200593,False,2019,69,0.847,0.678,9,-8.635,1,0.1090,0.0669,0.000000,0.2740,0.811,97.984,"hip hop, country"
1998,Sam Smith,Dancing With A Stranger (with Normani),0,171029,False,2019,75,0.741,0.520,8,-7.513,1,0.0656,0.4500,0.000002,0.2220,0.347,102.998,pop


In [151]:
#converting pandas dataframe to a python list
df_list = df.values.tolist()
train_list = train_df.values.tolist()
x, y = np.shape(df_list)
print(x, y)
print(np.shape(train_df))

2000 19
(2000, 16)


# Cosine Similarity

Cosine similarity measures the similarity between two vectors by calculating the cosine of the angle between them.

For two vectors **A** and **B**:
#### $$\text{cosine similarity}(A, B) = \frac{A \cdot B}{\|A\| \|B\|}$$
Where:
- $ A \cdot B $ = dot product of vectors A and B
- $ \|A\|, \|B\| $ = magnitudes (Euclidean lengths) of the vectors

## Properties
- Range: [-1, 1]
- 1: Vectors point in the same direction
- 0: Vectors are orthogonal
- -1: Vectors point in opposite directions

In [162]:
def cosine_similarity(A, B):
    return np.dot(A, B) / (norm(A) * norm(B))

Then we are going to rank all the songs based on  how similar they are to the song we choose

In [163]:
def recommend(title):
    idx = df[df['song'] == title].index[0]
    for i in range(x):
        if i == idx:
            continue
        df_list[i][2] = cosine_similarity(train_list[i][2:], train_list[idx][2:])

    return sorted(df_list, key=lambda l:l[2], reverse=True), idx

Be free to change the title of the song, choose the one you like the most so you can have a *useful recommendation*...

In [167]:
song = "any song from the dataset"
results, index = recommend(song)
genres = df_list[index][18].replace(',', '').split()
final_rec = []
for j in range(x-1):
    if df_list[j][1] == song:
        continue
    for k in range(len(genres)):
        if genres[k] in df_list[j][18]:
            final_rec.append(df_list[j])
            break

Finally, it's time to see what our recommendation system has done for us, there are two variants. In the first one, you will get the top 5 recommended songs of the same genre, in the second version, the recommendations will be part of other genres, so that you can explore new stuff.

In [168]:
for l in range(5):
    print(final_rec[l][:2])

['Sisqo', 'Thong Song']
['Eminem', 'The Real Slim Shady']
['Aaliyah', 'Try Again']
['Dr. Dre', 'The Next Episode']
['M.O.P.', 'Cold as Ice']


In [169]:
for l in range(5):
    print(results[l][:2])

['Christina Perri', 'A Thousand Years']
['Black Eyed Peas', 'Meet Me Halfway']
['Jennifer Lopez', 'On The Floor']
['Benny Benassi', 'Satisfaction (Isak Original Extended) - Benny Benassi Presents The Biz']
['will.i.am', 'Scream & Shout']
