# Build a song recommender system

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
song_data = pd.read_csv(r"song_data.csv")

In [5]:
song_data.head()

Unnamed: 0,user_id,song_id,listen_count,title,artist,song
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Jack Johnson,The Cove - Jack Johnson
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia,Entre Dos Aguas - Paco De Lucia
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Kanye West,Stronger - Kanye West
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson,Constellations - Jack Johnson
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters,Learn To Fly - Foo Fighters


# Count the number of unique feature entries in the data

In [14]:
print("Total number of entries in the dataframe is : ", len(song_data))

Total number of entries in the dataframe is :  1116609


In [9]:
print("Number of unique songs are : ", song_data['song'].nunique())

Number of unique songs are :  9952


In [10]:
print("Number of unique users are : ", song_data['user_id'].nunique())

Number of unique users are :  66346


In [11]:
print("Number of unique song_ids are : ", song_data['song_id'].nunique())

Number of unique song_ids are :  10000


In [12]:
print("Number of unique titles are : ", song_data['title'].nunique())

Number of unique titles are :  9566


In [13]:
print("Number of unique artists are : ", song_data['artist'].nunique())

Number of unique artists are :  3375


## Checking whether "listen_count" column makes a difference in popularity of a song

In [50]:
song_data.sort_values(by = 'listen_count', ascending=False)    # Yes it does to a large extent (The range is too steep.)

Unnamed: 0,user_id,song_id,listen_count,title,artist,song
590159,50996bbabb6f7857bf0c8019435b5246a0e45cfd,SOUAGPQ12A8AE47B3A,920,Crack Under Pressure,Righteous Pigs,Crack Under Pressure - Righteous Pigs
17178,bb85bb79612e5373ac714fcd4469cabeb5ed94e1,SOZQSVB12A8C13C271,796,Paradise & Dreams,Darren Styles,Paradise & Dreams - Darren Styles
1048498,c012ec364329bb08cbe3e62fe76db31f8c5d8ec3,SOBONKR12A58A7A7E0,683,You\'re The One,Dwight Yoakam,You\'re The One - Dwight Yoakam
921479,70caceccaa745b6f7bc2898a154538eb1ada4d5a,SOPREHY12AB01815F9,676,I\'m On A Boat,The Lonely Island / T-Pain,I\'m On A Boat - The Lonely Island / T-Pain
768319,d2232ac7a1ec17b283b5dff243161902b2cb706c,SOLGIWB12A58A77A05,649,Reelin\' In The Years,Steely Dan,Reelin\' In The Years - Steely Dan
...,...,...,...,...,...,...
477601,fcb9c51cbfa0053956717b450a874e31b0a0f3a0,SOIKPOV12A58A7A6B2,1,River Of Deceit,Mad Season,River Of Deceit - Mad Season
477603,fcb9c51cbfa0053956717b450a874e31b0a0f3a0,SOPGACU12A6701C5FF,1,Control,Puddle Of Mudd,Control - Puddle Of Mudd
477604,fcb9c51cbfa0053956717b450a874e31b0a0f3a0,SOQAPJS12A8C1310A4,1,Sludge Factory,Alice In Chains,Sludge Factory - Alice In Chains
477605,fcb9c51cbfa0053956717b450a874e31b0a0f3a0,SOSQIHH12A8C13370B,1,15 Step,Radiohead,15 Step - Radiohead


# Creating a song_score data frame which contains unique songs and their scores, where,

* score(song_unique) = sum(listen_count) * (percentage of the unique listeners)

# This basically decides the most popular according to number of total listens of each song till date. It is better than number of user count alone as the popularity depends on the total number of listens and also how many people are interested in a particular song. This score does not require any further normalization.

# Recommend top ten popular songs

In [118]:
users = song_data.user_id.unique()

In [130]:
def recommend_popular(user_id):
    song_total_listens = song_data.groupby("song")["listen_count"].sum()
    song_score_df = song_total_listens.to_frame().rename(columns = {'listen_count' : 'song_total_listens'})
    song_score_df['user_id'] = user_id
    total_unique_users = song_data['user_id'].nunique()
    song_score_df['percentage_unique_listeners'] = song_data.groupby("song")["song"].count() / total_unique_users
    song_score_df['song_score'] = song_score_df['song_total_listens'] * song_score_df['percentage_unique_listeners']
    del song_score_df['percentage_unique_listeners']
    del song_score_df['song_total_listens']
    song_score_df = song_score_df.sort_values(by ='song_score', ascending=False)
    song_score_df['rank'] = range(1, len(song_score_df) + 1)
    return song_score_df.head(10)

In [131]:
recommend_popular(users[0])

Unnamed: 0_level_0,user_id,song_score,rank
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
You\'re The One - Dwight Yoakam,b80344d063b5ccb3212f76538f3d9e43d87dca9e,2942.376541,1
Undo - Björk,b80344d063b5ccb3212f76538f3d9e43d87dca9e,2870.219441,2
Revelry - Kings Of Leon,b80344d063b5ccb3212f76538f3d9e43d87dca9e,1987.558391,3
Sehr kosmisch - Harmonia,b80344d063b5ccb3212f76538f3d9e43d87dca9e,1947.768064,4
Horn Concerto No. 4 in E flat K495: II. Romance (Andante cantabile) - Barry Tuckwell/Academy of St Martin-in-the-Fields/Sir Neville Marriner,b80344d063b5ccb3212f76538f3d9e43d87dca9e,1306.671043,5
Dog Days Are Over (Radio Edit) - Florence + The Machine,b80344d063b5ccb3212f76538f3d9e43d87dca9e,1086.312724,6
Secrets - OneRepublic,b80344d063b5ccb3212f76538f3d9e43d87dca9e,835.129232,7
Hey_ Soul Sister - Train,b80344d063b5ccb3212f76538f3d9e43d87dca9e,478.3492,8
Tive Sim - Cartola,b80344d063b5ccb3212f76538f3d9e43d87dca9e,469.257755,9
Fireflies - Charttraxx Karaoke,b80344d063b5ccb3212f76538f3d9e43d87dca9e,456.489027,10


In [127]:
recommend_popular(users[10])

Unnamed: 0_level_0,user_id,song_score,rank
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
You\'re The One - Dwight Yoakam,45544491ccfcdc0b0803c34f201a6287ed4e30f8,2942.376541,1
Undo - Björk,45544491ccfcdc0b0803c34f201a6287ed4e30f8,2870.219441,2
Revelry - Kings Of Leon,45544491ccfcdc0b0803c34f201a6287ed4e30f8,1987.558391,3
Sehr kosmisch - Harmonia,45544491ccfcdc0b0803c34f201a6287ed4e30f8,1947.768064,4
Horn Concerto No. 4 in E flat K495: II. Romance (Andante cantabile) - Barry Tuckwell/Academy of St Martin-in-the-Fields/Sir Neville Marriner,45544491ccfcdc0b0803c34f201a6287ed4e30f8,1306.671043,5
Dog Days Are Over (Radio Edit) - Florence + The Machine,45544491ccfcdc0b0803c34f201a6287ed4e30f8,1086.312724,6
Secrets - OneRepublic,45544491ccfcdc0b0803c34f201a6287ed4e30f8,835.129232,7
Hey_ Soul Sister - Train,45544491ccfcdc0b0803c34f201a6287ed4e30f8,478.3492,8
Tive Sim - Cartola,45544491ccfcdc0b0803c34f201a6287ed4e30f8,469.257755,9
Fireflies - Charttraxx Karaoke,45544491ccfcdc0b0803c34f201a6287ed4e30f8,456.489027,10


# Create collaborative filtering model

## consider a fraction of dataset (10000 rows) randomly to avoid memory errors.

In [153]:
song_data_10000 = song_data.sample(n = 10000, random_state = 0)

In [154]:
song_data_10000.head()

Unnamed: 0,user_id,song_id,listen_count,title,artist,song
333951,8f8f909dd86e77969d1f8753e10ac56c4c68aa72,SOZXQRQ12A6701C323,2,Up Where We Belong,Joe Cocker / Jennifer Warnes,Up Where We Belong - Joe Cocker / Jennifer Warnes
492194,d5a1b1b8142327cea378f262b96ef5675c8bff99,SOZKNSB12A8C140F11,6,Replay,Iyaz,Replay - Iyaz
525465,58907ab6047361c85eb92be39dcbd2f0eb924684,SOLFBXX12A8AE47A51,1,Surfacing (Live version) (Album Version),Slipknot,Surfacing (Live version) (Album Version) - Sli...
465962,f22c0aa5308322685a42fc10fa05ea3f4a6e96b4,SOWTAVD12B0B8090C2,1,Just Like You Imagined,Nine Inch Nails,Just Like You Imagined - Nine Inch Nails
162333,3134803ece9906f87bb1b64a63058ef26c603336,SOCCEFN12A6701C548,1,Tonight (Featuring Mobb Deep & Joe Hooker)(Alb...,The Notorious B.I.G.,Tonight (Featuring Mobb Deep & Joe Hooker)(Alb...


In [157]:
user_song_listen_count = song_data_10000.pivot_table(index='user_id', columns='song', values='listen_count').fillna(0)

In [158]:
user_song_listen_count

song,#40 - DAVE MATTHEWS BAND,& Down - Boys Noize,(Antichrist Television Blues) - Arcade Fire,(I Got That) Boom Boom - Britney Spears featuring Ying Yang Twins,(I\'ve Had) The Time Of My Life - Bill Medley & Jennifer Warnes,(If You\'re Wondering If I Want You To) I Want You To - Weezer,(Nice Dream) - Radiohead,(Nothing\'s Too Good) For My Baby (1999 Digital Remaster) - Louis Prima And Keely Smith,(Oh) Pretty Woman (Remastered Album Version) - Van Halen,(Sittin\' On) The Dock Of The Bay - Otis Redding,...,aNYway - Armand Van Helden & A-TRAK Present Duck Sauce,and then patterns - Four Tet,in white rooms - Booka Shade,mOBSCENE - Marilyn Manson,ný Batterý - Sigur Ros,paranoid android - Christopher O\'Riley,re:stacks - Bon Iver,sillyworld (Album Version) - Stone Sour,sun drums and soil - Four Tet,you were there with me - Four Tet
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
003bf214024cbf64b880ed1395592e6c994c173a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
003f1e939952a57d1a5bc990727acad5ceea97b4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
004dc9f93f5ad4a75f9a3ba0da5dd887b31d6bd2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
005ff9f7ccac1d767e76a43237b2cdb2a4ae0503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
006a645395299d998d4c85ba79f3c03f8f76a14b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffef9c3e59ab44554a9775af5e3b2ac149111bb6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fff543db7918cb8f4f56f7470903eb2f1d5a6dd8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fffb701ee87a32eff67eb040ed59146121f01571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fffce9c1537fbc350ea68823d956eaa8f5236dbe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
