# Build a song recommender system

In [1]:
import turicreate

# Load some music data

In [50]:
song_data = turicreate.SFrame('song_data.sframe')

# Explore our data

In [7]:
song_data.head()

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


## Show the most popular songs in the dataset

In [11]:
len(song_data)

1116609

In [8]:
song_data['song'].show()

# Count the number of unique users in the data

In [12]:
users = song_data['user_id'].unique()

In [13]:
len(users)

66346

# Create a song recommender

In [14]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [15]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [16]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


In [17]:
popularity_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Sehr kosmisch - Harmonia,4754.0,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Undo - Björk,4227.0,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,You're The One - Dwight Yoakam ...,3781.0,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Revelry - Kings Of Leon,3527.0,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Secrets - OneRepublic,3148.0,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Hey_ Soul Sister - Train,2538.0,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [18]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [19]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


In [20]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Grind With Me (Explicit Version) - Pretty Ricky ...,0.0459424376487731,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,There Goes My Baby - Usher ...,0.0331920742988586,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Panty Droppa [Intro] (Album Version) - Trey ...,0.031856620311737,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nobody (Featuring Athena Cage) (LP Version) - ...,0.0278467655181884,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Youth Against Fascism - Sonic Youth ...,0.0262914180755615,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nice & Slow - Usher,0.0239639401435852,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Making Love (Into The Night) - Usher ...,0.0238176941871643,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Naked - Marques Houston,0.0228925704956054,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,I.nner Indulgence - DESTRUCTION ...,0.0220767498016357,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Love Lost (Album Version) - Trey Songz ...,0.0204497694969177,10


# Apply model to find similar songs in the data set

In [21]:
personalized_model.get_similar_items(['With Or Without You - U2'])

song,similar,score,rank
With Or Without You - U2,I Still Haven't Found What I'm Looking For ...,0.0428571701049804,1
With Or Without You - U2,Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...,0.033734917640686,2
With Or Without You - U2,Window In The Skies - U2,0.032835841178894,3
With Or Without You - U2,Vertigo - U2,0.030075192451477,4
With Or Without You - U2,Sunday Bloody Sunday - U2,0.0271317958831787,5
With Or Without You - U2,Bad - U2,0.0251798629760742,6
With Or Without You - U2,A Day Without Me - U2,0.0237154364585876,7
With Or Without You - U2,Another Time Another Place - U2 ...,0.0203251838684082,8
With Or Without You - U2,Walk On - U2,0.0202020406723022,9
With Or Without You - U2,Get On Your Boots - U2,0.0196850299835205,10


In [22]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.1881188154220581,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.1871921420097351,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.1848341226577758,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399047,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229125,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684326,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447448,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.0187969803810119,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.0187969803810119,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374061584,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [28]:
model_performance = turicreate.recommender.util.compare_models(test_data, 
                                                               [popularity_model, personalized_model], 
                                                               user_sample=0.05)

compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.020812009553053568 | 0.005867233528441308 |
|   2    | 0.017741385192766974 | 0.00895425606275145  |
|   3    | 0.016945297395655652 | 0.012542485188339843 |
|   4    | 0.015950187649266475 | 0.01578885688404623  |
|   5    | 0.015080177413851942 | 0.018415552755368537 |
|   6    | 0.015239394973274199 |  0.0219739337958376  |
|   7    | 0.015158161524589368 | 0.026684132236844633 |
|   8    | 0.01467076083248038  | 0.02970492489428009  |
|   9    | 0.014329580348004111 | 0.032891569176113744 |
|   10   | 0.013340156943022893 | 0.03401028915358502  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.027635619242579325 | 0.008428076340051766 |
|   2    | 0.021835551006482448 | 0.013237629537527183 |
|   3    | 0.01978846809962472  | 0.016859435155238658 |
|   4    | 0.017911975435005136 | 0.020318088409183598 |
|   5    | 0.01644489935175709  | 0.022621893630081977 |
|   6    | 0.015523712043671088 | 0.025968724023995273 |
|   7    | 0.014963201247745764 | 0.028306758066225818 |
|   8    | 0.014329580348004104 | 0.03102741395986026  |
|   9    | 0.013457674665453586 | 0.032785297268408835 |
|   10   | 0.012896622313203696 | 0.03456154662654151  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.

# 1.Counting unique users

##  Unique users who listened to songs by different artist

In [53]:
song_data[0:3]['user_id','title','artist']

user_id,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Stronger,Kanye West


In [55]:
sub1 = song_data[song_data['artist']=='Kanye West']
sub2 = song_data[song_data['artist']=='Foo Fighters']
sub3 = song_data[song_data['artist']=='Taylor Swift']
sub4 = song_data[song_data['artist']=='Lady GaGa']

In [57]:
sub1[0:3]['user_id','title','artist']

user_id,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Champion,Kanye West
5d5e0142e54c3bb7b69f548c2 ee55066c90700eb ...,Stronger,Kanye West


In [58]:
sub2[0:3]['user_id','title','artist']

user_id,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,Breakout,Foo Fighters


In [62]:
print(sub1[0]['artist']+','+str(len(sub1['user_id'].unique())))
print(sub2[0]['artist']+','+str(len(sub2['user_id'].unique())))
print(sub3[0]['artist']+','+str(len(sub3['user_id'].unique())))
print(sub4[0]['artist']+','+str(len(sub4['user_id'].unique())))

Kanye West,2522
Foo Fighters,2055
Taylor Swift,3246
Lady GaGa,2928


#### Taylor Swift has the largest number of unique users

# 2.Using groupby-aggregate to find the most popular and least popular artist

In [64]:
group = song_data.groupby(key_column_names='artist', 
                          operations={'total_count': turicreate.aggregate.SUM('listen_count')})

In [65]:
group

artist,total_count
The Dells,274
16Volt,579
The Stray Cats,411
Billy Preston / Syreeta,189
Emma Shapplin,252
Lil Jon & The East Side Boyz / Ludacris / Usher ...,256
Spoon,1061
Sam & Dave,656
Blue Swede,266
Scooter,1202


In [68]:
group.sort('total_count')[0]

{'artist': 'William Tabbert', 'total_count': 14}

#### The least popular artist, the one with smallest total listen_count, in the data set is 'William Tabbert'

In [71]:
group.sort('total_count')[-1]

{'artist': 'Kings Of Leon', 'total_count': 43218}

#### The most popular artist, the one with highest total listen_count, in the data set is 'Kings Of Leon'

# 3.Using groupby-aggregate to find the most recommended songs

In [72]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [73]:
subset_test_users.head()

dtype: str
Rows: 10
['c067c22072a17d33310d7223d7b79f819e48cf42', '696787172dd3f5169dc94deef97e427cee86147d', '532e98155cbfd1e1a474a28ed96e59e50f7c5baf', '18325842a941bc58449ee71d659a08d1c1bd2383', '507433946f534f5d25ad1be302edb9a2376f503c', '18fafad477f9d72ff86f7d0bd838a6573de0f64a', 'fe85b96ba1983219b296f6b4869dd29eb2b72ff9', '225ea420b4bede50919d1bfe24a599691522d176', '95dc7e2b188b1148b2d25f4e6b6e94afacc4efc3', '4a3a1ae2748f12f7ab921a47d6d79abf82e3e325']

In [74]:
predict = personalized_model.recommend(subset_test_users,k=1)

In [76]:
predict[0:3]

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Grind With Me (Explicit Version) - Pretty Ricky ...,0.0459424376487731,1
696787172dd3f5169dc94deef 97e427cee86147d ...,Senza Una Donna (Without A Woman) - Zucchero / ...,0.0170265776770455,1
532e98155cbfd1e1a474a28ed 96e59e50f7c5baf ...,Jive Talkin' (Album Version) - Bee Gees ...,0.0118288653237479,1


In [79]:
sub_group = predict.groupby(key_column_names='song',
                            operations={'count':turicreate.aggregate.COUNT()})

In [80]:
sub_group[0]

{'song': 'Arco Arena - Cake', 'count': 1}

In [81]:
sub_group[-1]

{'song': 'They Might Follow You - Tiny Vipers', 'count': 10}

### Visualize minimum recommended songs

In [85]:
len(predict[predict['song']=='Arco Arena - Cake'])

1

In [86]:
predict[predict['song']=='Arco Arena - Cake']

user_id,song,score,rank
e1655ba2b504f88854f7d70c1 0cc9b460018ff1d ...,Arco Arena - Cake,0.0593751271565755,1


### Visualize maximum recommended songs

In [83]:
len(predict[predict['song']=='They Might Follow You - Tiny Vipers'])

10

In [82]:
predict[predict['song']=='They Might Follow You - Tiny Vipers']

user_id,song,score,rank
3fa06d8890bb3d31d99afe90d d858b2341966671 ...,They Might Follow You - Tiny Vipers ...,0.040231501062711,1
925d631dfca804d9ca9ed57aa 3894d0fca58a931 ...,They Might Follow You - Tiny Vipers ...,0.0219966428620474,1
5cb2aeb6062b4feddfb6691de 11829b99d895bf0 ...,They Might Follow You - Tiny Vipers ...,0.0176701918244361,1
83bcdc96df62ddf30c78a0495 a3b8325b8cee05e ...,They Might Follow You - Tiny Vipers ...,0.0149086952209472,1
7a4b557df3e3d4dd626177004 27a183fec9b52b6 ...,They Might Follow You - Tiny Vipers ...,0.0251805901527404,1
4c401ad8f51c8b81aa8870686 06420718dde5bf6 ...,They Might Follow You - Tiny Vipers ...,0.0383919146325853,1
bdec7f305d52435d572571a48 4e7bcda509fa884 ...,They Might Follow You - Tiny Vipers ...,0.0409191623330116,1
28252893b0137c14fb5bb6837 18946ca7f14863d ...,They Might Follow You - Tiny Vipers ...,0.0151402269090924,1
b5d45a388490f70f7bba75c23 e49c7f05838d054 ...,They Might Follow You - Tiny Vipers ...,0.0251430698803492,1
a3e436ab834289181c40dfd51 1a1ea317072587b ...,They Might Follow You - Tiny Vipers ...,0.0175446548632213,1
