In [17]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [2]:
musics = pd.read_csv('musics.csv')

In [3]:
musics.dropna(inplace=True)

In [4]:
musics.year = musics.year.apply(lambda x:'year:'+str(x))

In [5]:
column_rename = {
    "the genre of the track": "genre",
    "Beats.Per.Minute -The tempo of the song": "bpm",
    "Energy- The energy of a song - the higher the value, the more energtic": "energy",
    "Danceability - The higher the value, the easier it is to dance to this song": "dance",
    "Loudness/dB - The higher the value, the louder the song": "loud",
    "Liveness - The higher the value, the more likely the song is a live recording": "live",
    "Valence - The higher the value, the more positive mood for the song": "valence",
    "Length - The duration of the song":"duration",
    "Acousticness - The higher the value the more acoustic the song is": "acoustic",
    "Speechiness - The higher the value the more spoken word the song contains": "speech",
    "Popularity- The higher the value the more popular the song is": "popularity"
}
musics = musics.rename(columns=column_rename)

In [6]:
remove_spaces = lambda x : x.replace(" ","").lower()
musics.artist = musics.artist.apply(remove_spaces)
musics.genre = musics.genre.apply(remove_spaces)

In [7]:
def set_values():
    keys = ['artist','genre','bpm','energy','dance','loud','live','valence','duration','acoustic','speech','popularity']
    musics['tags'] = ""
    for key in keys:
        musics[key] = musics[key].apply(lambda x : key+":"+str(x))
        musics.tags = musics.tags+musics[key]+" "
set_values()

In [8]:
data = musics[['title','artist','genre','tags']]

In [9]:
count_v = CountVectorizer(max_features=5000,stop_words='english')
vectors = count_v.fit_transform(data.tags).toarray()

In [10]:
vectors

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [11]:
similarity = cosine_similarity(vectors)

In [12]:
def recommend(music):
    index = data[data['title'] == music].index[0]
    distances = similarity[index]
    musics_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]
    for i in musics_list:
        print(data.iloc[i[0]].title)

In [16]:
recommend('Kills You Slowly')

Takeaway
Call You Mine
Paris
Final Song
Beneath Your Beautiful


In [14]:
data

Unnamed: 0,title,artist,genre,tags
0,"Hey, Soul Sister",artist:train,genre:neomellow,artist:train genre:neomellow bpm:97 energy:89 ...
1,Love The Way You Lie,artist:eminem,genre:detroithiphop,artist:eminem genre:detroithiphop bpm:87 energ...
2,TiK ToK,artist:kesha,genre:dancepop,artist:kesha genre:dancepop bpm:120 energy:84 ...
3,Bad Romance,artist:ladygaga,genre:dancepop,artist:ladygaga genre:dancepop bpm:119 energy:...
4,Just the Way You Are,artist:brunomars,genre:pop,artist:brunomars genre:pop bpm:109 energy:84 d...
...,...,...,...,...
598,Find U Again (feat. Camila Cabello),artist:markronson,genre:dancepop,artist:markronson genre:dancepop bpm:104 energ...
599,Cross Me (feat. Chance the Rapper & PnB Rock),artist:edsheeran,genre:pop,artist:edsheeran genre:pop bpm:95 energy:79 da...
600,"No Brainer (feat. Justin Bieber, Chance the Ra...",artist:djkhaled,genre:dancepop,artist:djkhaled genre:dancepop bpm:136 energy:...
601,Nothing Breaks Like a Heart (feat. Miley Cyrus),artist:markronson,genre:dancepop,artist:markronson genre:dancepop bpm:114 energ...


In [20]:
pickle.dump(data.to_dict(),open('data.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))