In [1]:
import pickle
import pandas as pd
import numpy as np
import pprint
from string import punctuation
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
pp = pprint.PrettyPrinter(indent=4)
from collections import namedtuple
Song = namedtuple("Song", ["artist", "title"])
Mood = namedtuple("Mood", ["description", "probability"])
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import jaccard_similarity_score

In [2]:
songs_dataset = pd.read_json('MasterSongList.json')

In [3]:
trained_xgb = pickle.load(open('trained_xgb.pickle', 'rb'))

In [4]:
trained_lyrics = pickle.load(open('trained_lyrics.pickle', 'rb'))

In [5]:
translator_object = str.maketrans('', '', punctuation)
def clean_text(raw_text):
    stemmed_words = []
    raw_text = ' '.join(raw_text)
    raw_text = raw_text.lower()
    lyric = raw_text.translate(translator_object)
    splitted_lyrics = lyric.split()
    
    for word in splitted_lyrics:
        if word not in ENGLISH_STOP_WORDS:
            stemmed_words.append(stemmer.stem(word))
    return ' '.join(stemmed_words)

In [6]:
songs_dataset.head(10)

Unnamed: 0,_id,album,artist,audio_features,context,decades,genres,lyrics_features,moods,name,new_context,picture,recording_id,sub_context,yt_id,yt_views
0,{'$oid': '52fdfb440b9398049f3d7a8c'},Gangnam Style (강남스타일),PSY,"[11, 0.912744, 0.083704, 132.069, 0.293137, 0....",[work out],[],[pop],"[oppa, gangnam, style, gangnam, style, najeneu...","[energetic, motivational]",Gangnam Style (강남스타일),work out,http://images.musicnet.com/albums/073/463/405/...,50232.0,[working out: cardio],9bZkp7q19f0,2450112089
1,{'$oid': '52fdfb3d0b9398049f3cbc8e'},Native,OneRepublic,"[6, 0.7457039999999999, 0.11995499999999999, 1...",[energetic],[2012],[pop],"[lately, i, ve, been, i, ve, been, losing, sle...",[happy],Counting Stars,energetic,http://images.musicnet.com/albums/081/851/887/...,5839.0,[energy boost],hT_nvWreIhg,1020297206
2,{'$oid': '52fdfb420b9398049f3d3ea5'},Party Rock Anthem,LMFAO,"[5, 0.709932, 0.231455, 130.03, 0.121740999999...","[energetic, energetic, energetic, energetic]",[],[],"[party, rock, yeah, woo, let, s, go, party, ro...","[happy, celebratory, rowdy]",Party Rock Anthem,housework,http://images.musicnet.com/albums/049/414/127/...,52379.0,"[energy boost, pleasing a crowd, housework, dr...",KQ6zr6kCPj8,971128436
3,{'$oid': '52fdfb410b9398049f3d1eac'},Gentleman,PSY,"[3, 0.705822, 0.053292, 126.009, 0.126016, 0.0...","[party, party, party, party, party, party]",[2010s],[dance],"[alagamun, lan, weh, wakun, heya, hanun, gon, ...","[happy, energetic, celebratory]",Gentleman,energetic,http://images.musicnet.com/albums/082/950/461/...,12353.0,"[driving in the left lane, energy boost, girls...",ASO_zypdnsQ,892096527
4,{'$oid': '52fdfb400b9398049f3d0b19'},On The Floor,Jennifer Lopez,"[3, 0.741757, 0.07277399999999999, 129.985, 0....","[party, party]",[2000s],[reggaeton],"[j, lo, the, other, side, out, my, mine, it, s...",[energetic],On The Floor,work out,http://images.musicnet.com/albums/050/131/765/...,29502.0,"[working out: cardio, dance party: sweaty]",t4H_Zoh7G5A,873285189
5,{'$oid': '52fdfb3e0b9398049f3cdd7f'},The Lazy Song,Bruno Mars,"[8, 0.733856, 0.093043, 174.952, 0.05813699999...",[just woke up],[],[],"[today, i, don, t, feel, like, doing, anything...","[happy, sprightly]",The Lazy Song,just woke up,http://images.musicnet.com/albums/051/655/149/...,46206.0,[waking up on the right side of the bed],fLexgOxsZu0,775630683
6,{'$oid': '52fdfb3f0b9398049f3ce0f0'},Rolling In The Deep,Adele,"[8, 0.7773749999999999, 0.054104, 104.946, 0.0...","[relax, relax, relax]",[],[r&b: soul],"[there, s, a, fire, starting, in, my, heart, r...",[warm],Rolling In The Deep,chillout,http://images.musicnet.com/albums/047/744/109/...,9154.0,"[pleasing a crowd, lying low on a sunday after...",rYEDA3JcQqw,745960922
7,{'$oid': '52fdfb440b9398049f3d7a7d'},Call Me Maybe,Carly Rae Jepsen,"[7, 0.585564, 0.10829699999999999, 120.014, 0....",[work out],[],[pop],"[i, threw, a, wish, in, the, well, don, t, ask...","[energetic, motivational]",Call Me Maybe,work out,http://images.musicnet.com/albums/064/604/149/...,35484.0,[working out: cardio],fWNaR-rxAic,743689468
8,{'$oid': '52fdfb400b9398049f3d0d47'},Somebody That I Used To Know (Feat. Kimbra),Gotye,"[0, 0.418212, 0.105322, 129.054, 0.045461, 0.5...",[candlelit dinner],[],[],"[now, and, then, i, think, of, when, we, were,...","[seductive, nocturnal]",Somebody That I Used To Know (Feat. Kimbra),candlelit dinner,http://images.musicnet.com/albums/056/945/335/...,41786.0,[romantic evening],8UVNT4wvIGY,702996459
9,{'$oid': '52fdfb400b9398049f3d0dc7'},What Makes You Beautiful,One Direction,"[4, 0.81403, 0.079196, 124.991, 0.07244, 0.005...","[studying / working, studying / working, study...",[2010s],[pop],"[don, t, know, what, for, you, re, turning, he...","[happy, celebratory]",What Makes You Beautiful,housework,http://images.musicnet.com/albums/064/019/497/...,20475.0,"[energy boost, pleasing a crowd, housework, wo...",QJO3ROT-A4E,697513708


In [7]:
dummy_song = np.array(songs_dataset.values[2][3])
dummy_lyric = songs_dataset.values[2][7]

In [8]:
mlb = MultiLabelBinarizer()
genres = songs_dataset['genres']
def consolidate_genre(genre):
    if (len(genre) > 0):
        return genre.split(':')[0]
    else:
        return float('NaN')

songs_dataset.loc[:, 'genres'] = songs_dataset['genres'].apply(''.join)
songs_dataset.loc[:, 'genres'] = songs_dataset['genres'].apply(consolidate_genre)

In [9]:
genres_moods_df = pd.concat([songs_dataset['genres'], songs_dataset['moods'], songs_dataset['artist'], songs_dataset['name']], axis=1)

In [10]:
genres_moods_df.dropna(how='any', inplace=True)

In [11]:
genres_moods_df = genres_moods_df.join(pd.DataFrame(mlb.fit_transform(genres_moods_df['moods']), columns=mlb.classes_, index=genres_moods_df.index))

In [12]:
genres_moods_df.drop('moods', inplace=True, axis=1)

In [13]:
genres_moods_df.rename(columns={'name': 'song_name'}, inplace=True)

In [14]:
def recommend_similar_songs(audio_features, lyrics_features):
    final_result_dictionary = dict(playlist=[])
    predicted_genre = trained_xgb.predict(audio_features.reshape((1, -1)))
    cleaned_lyrics = clean_text(lyrics_features)
    predicted_moods = trained_lyrics.predict([cleaned_lyrics])
    print(predicted_moods)
    print(mlb.inverse_transform([('celebratory', 'happy', 'rowdy')]))
    global new_genres_moods_df
    new_genres_moods_df = genres_moods_df.loc[(genres_moods_df['genres']) == predicted_genre[0]].copy()
    for index, row in new_genres_moods_df.drop(['genres', 'artist', 'song_name'], axis=1).iterrows():
        new_genres_moods_df['score'] = jaccard_similarity_score(row, predicted_moods[0])
        
    new_genres_moods_df.sort_values(by=['score'])
    global top_ten_songs 
    top_ten_songs = new_genres_moods_df.sort_values(by=['score'], ascending=False).sample(n=10)
    
    for index, song in top_ten_songs.iterrows():
        final_result_dictionary['playlist'].append(Song(artist=song.artist, title=song.song_name))
    
    return final_result_dictionary
predict_genres_moods = recommend_similar_songs(dummy_song, dummy_lyric)
print(predict_genres_moods)

  if diff:


[('celebratory', 'happy', 'rowdy')]
{'playlist': [Song(artist='Lamb', title='Trans Fatty Acid (Kruder & Dorfmeister Session Mix)'), Song(artist='Portishead', title='It Could Be Sweet'), Song(artist='Groove Armada', title='History (Love Mix)'), Song(artist='Depth Charge', title='ASP'), Song(artist='DJ Cam', title='Meera (Extra Lucid Remix)'), Song(artist='Portishead', title='Only You'), Song(artist='J-One', title='All I Need (Phaeleh Remix)'), Song(artist='Groove Armada', title='Hands Of Time'), Song(artist='Christopher Willits', title='Sun Body'), Song(artist='Beck', title='The Golden Age')]}


In [16]:
new_genres_moods_df


Unnamed: 0,genres,artist,song_name,aggressive,angsty,atmospheric,campy,celebratory,classy,cocky,...,sexual,soothing,spacey,sprightly,sweet,trashy,trippy,visceral,warm,score
366,electronica,M.I.A.,Paper Planes,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
525,electronica,Human Mesh Dance,In Pools,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.875
824,electronica,Gorillaz,On Melancholy Hill,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.875
946,electronica,Coldplay,Life In Technicolor,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.875
1018,electronica,College,A Real Hero,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
1019,electronica,Dee Flack,Long Way (Instrumental Mix),0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
1048,electronica,Noir Désir,Le Vent Nous Portera,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
1160,electronica,Tube & Berger,Imprint Of Pleasure (Original Mix),0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
1205,electronica,Flight Facilities,Crave You,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
1216,electronica,Massive Attack,Unfinished Sympathy,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.875


In [15]:
top_ten_songs

Unnamed: 0,genres,artist,song_name,aggressive,angsty,atmospheric,campy,celebratory,classy,cocky,...,sexual,soothing,spacey,sprightly,sweet,trashy,trippy,visceral,warm,score
24755,electronica,Lamb,Trans Fatty Acid (Kruder & Dorfmeister Session...,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
6823,electronica,Portishead,It Could Be Sweet,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
19285,electronica,Groove Armada,History (Love Mix),0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.875
30988,electronica,Depth Charge,ASP,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
31514,electronica,DJ Cam,Meera (Extra Lucid Remix),0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
3886,electronica,Portishead,Only You,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
19654,electronica,J-One,All I Need (Phaeleh Remix),0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
7510,electronica,Groove Armada,Hands Of Time,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
33521,electronica,Christopher Willits,Sun Body,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.875
4922,electronica,Beck,The Golden Age,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0.875
