# Build a song recommender system

In [1]:
import turicreate

# Load some music data

In [2]:
song_data = turicreate.SFrame('./song_data.sframe/')

# Explore our data

In [3]:
song_data

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


## Show the most popular songs in the dataset

In [4]:
vis = song_data['song']
vis.show()

# Count the number of unique users in the data

In [5]:
users = song_data['user_id'].unique()

In [6]:
len(users)

66346

In [7]:
listen_count_sum = song_data['listen_count'].sum()
listen_count_sum

3674502

# Create a song recommender

In [8]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [9]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [10]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


In [11]:
popularity_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Sehr kosmisch - Harmonia,4754.0,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Undo - Björk,4227.0,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,You're The One - Dwight Yoakam ...,3781.0,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Revelry - Kings Of Leon,3527.0,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Secrets - OneRepublic,3148.0,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Hey_ Soul Sister - Train,2538.0,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [12]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [13]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


In [14]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Grind With Me (Explicit Version) - Pretty Ricky ...,0.0459424376487731,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,There Goes My Baby - Usher ...,0.0331920742988586,2
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Panty Droppa [Intro] (Album Version) - Trey ...,0.031856620311737,3
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nobody (Featuring Athena Cage) (LP Version) - ...,0.0278467655181884,4
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Youth Against Fascism - Sonic Youth ...,0.0262914180755615,5
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Nice & Slow - Usher,0.0239639401435852,6
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Making Love (Into The Night) - Usher ...,0.0238176941871643,7
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Naked - Marques Houston,0.0228925704956054,8
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,I.nner Indulgence - DESTRUCTION ...,0.0220767498016357,9
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Love Lost (Album Version) - Trey Songz ...,0.0204497694969177,10


# Apply model to find similar songs in the data set

In [15]:
personalized_model.get_similar_items(['With Or Without You - U2'])

song,similar,score,rank
With Or Without You - U2,I Still Haven't Found What I'm Looking For ...,0.0428571701049804,1
With Or Without You - U2,Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...,0.033734917640686,2
With Or Without You - U2,Window In The Skies - U2,0.032835841178894,3
With Or Without You - U2,Vertigo - U2,0.030075192451477,4
With Or Without You - U2,Sunday Bloody Sunday - U2,0.0271317958831787,5
With Or Without You - U2,Bad - U2,0.0251798629760742,6
With Or Without You - U2,A Day Without Me - U2,0.0237154364585876,7
With Or Without You - U2,Another Time Another Place - U2 ...,0.0203251838684082,8
With Or Without You - U2,Walk On - U2,0.0202020406723022,9
With Or Without You - U2,Get On Your Boots - U2,0.0196850299835205,10


In [16]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.1881188154220581,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.1871921420097351,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.1848341226577758,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399047,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229125,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684326,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447448,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.0187969803810119,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.0187969803810119,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374061584,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [17]:
model_performance = turicreate.recommender.util.compare_models(
    test_data, [popularity_model, personalized_model], user_sample=.05)

compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.023200272944387597 | 0.006667768336140906 |
|   2    | 0.02234732173319687  | 0.011554019448082911 |
|   3    | 0.02081200955305358  | 0.016616470928139296 |
|   4    | 0.01978846809962472  | 0.02091196030551204  |
|   5    | 0.019174343227567402 | 0.02472287133239029  |
|   6    | 0.01802570226316387  | 0.02809579868177617  |
|   7    | 0.017448944777501644 | 0.031586547668942776 |
|   8    | 0.016675196178778568 | 0.03432791809199179  |
|   9    | 0.015618484400470088 | 0.03623324863089448  |
|   10   | 0.015114295462299593 |  0.0394789310730969  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1





Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.04128283862163086  | 0.011530761876207116 |
|   2    | 0.034288638689866945 | 0.01819966871143945  |
|   3    | 0.030137609462072106 | 0.023430044310289982 |
|   4    | 0.02737973387922211  | 0.027519213036101466 |
|   5    | 0.025793244626407412 | 0.03231071262033393  |
|   6    | 0.023996360741498936 | 0.03608290237000577  |
|   7    | 0.02281035239070043  | 0.03986484013351975  |
|   8    | 0.021494370522006156 | 0.04238728641236314  |
|   9    | 0.02012964858410103  | 0.04403349455805955  |
|   10   | 0.019003752985329282 | 0.04569134616473411  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.

## count users who liseneted to specific singers

In [18]:
Kayne = song_data[song_data['artist'] == 'Kanye West']
Kayne[Kayne['user_id']].unique()

artist,listen_count,song,song_id,title
Kanye West,1,Say You Will - Kanye West,SOYIUDO12AB01838F5,Say You Will
Kanye West,1,Through The Wire - Kanye West ...,SOIBSWV12A6D4F6AB3,Through The Wire
Kanye West,1,Flashing Lights - Kanye West ...,SOOLPFK12A58A7BDE3,Flashing Lights
Kanye West,8,Homecoming - Kanye West,SOIYWPZ12A81C204EF,Homecoming
Kanye West,3,Jesus Walks - Kanye West,SOUCBEB12A6310E1F9,Jesus Walks
Kanye West,8,Homecoming - Kanye West,SOIYWPZ12A81C204EF,Homecoming
Kanye West,1,Homecoming - Kanye West,SOIYWPZ12A81C204EF,Homecoming
Kanye West,1,Coldest Winter - Kanye West ...,SOCMNRG12AB0189D3F,Coldest Winter
Kanye West,1,Hey Mama - Kanye West,SOUXNNU12A67020A48,Hey Mama
Kanye West,1,RoboCop - Kanye West,SOTGKTG12AB0189D2E,RoboCop

user_id
acfe56f70a87e2df5e78c7500 183ad660ea187a4 ...
94be0c8807e72979251a0bfff bef207963aefbf3 ...
7e2325a9e56525c2c373ed2ef f8e9d80c212e38a ...
41165837b569688f022d13179 4e61999ca4c3192 ...
c04f94e15c1eaacb01ce2687c d63e231400e61f8 ...
071a964898dc8f6d4aef983fa 45bbea65ab29168 ...
0c1bd87e9411cd93729abf821 fd1fd7ae212b80c ...
18252a0c532f23597b533d974 a1e3c45d559a739 ...
36cfaf822e0ae9cef36d8aa4e bf86d8861919822 ...
d251cd245a1374c733a1fde26 08ef86932fec1b4 ...


In [19]:
Foo = song_data[song_data['artist'] == 'Foo Fighters']
Foo[Foo['user_id']].unique()

artist,listen_count,song,song_id,title
Foo Fighters,6,Next Year - Foo Fighters,SOYYIZT12A8C1408CA,Next Year
Foo Fighters,8,Breakout - Foo Fighters,SOMSQJY12A8C138539,Breakout
Foo Fighters,1,Exhausted - Foo Fighters,SONMPJJ12AB0183AF8,Exhausted
Foo Fighters,1,Next Year - Foo Fighters,SOYYIZT12A8C1408CA,Next Year
Foo Fighters,1,The Pretender - Foo Fighters ...,SOQLUTQ12A8AE48037,The Pretender
Foo Fighters,13,Everlong - Foo Fighters,SOXVVSM12A8C142224,Everlong
Foo Fighters,1,Exhausted - Foo Fighters,SONMPJJ12AB0183AF8,Exhausted
Foo Fighters,2,Virginia Moon - Foo Fighters ...,SOKQTHF12B0B80B306,Virginia Moon
Foo Fighters,1,Low - Foo Fighters,SOXGFMC12A8C1386EC,Low
Foo Fighters,4,Everlong - Foo Fighters,SOXVVSM12A8C142224,Everlong

user_id
7e3f6e77217967868c52338eb dc793b33ba28eb9 ...
6952b87aedaaddb57f99c2207 d0fac06ca1bd86f ...
dfb04c4a166ddd0e53cafccb9 f0540a7744e2346 ...
843209628fc05b104ccd0d841 1cd41faa58a4d6e ...
d6fe20c5b749f74e43595caf3 f9b61b5bcded1ed ...
932ad377a3bb70d0d32969f49 41f0268d560afc9 ...
7e3f6e77217967868c52338eb dc793b33ba28eb9 ...
d8dde5d48711ad8ae25253b5b 310b6adec906f42 ...
875988a37dd8d37da406d652f d8063a60a238b93 ...
608f53c1ec24eecf1a12e34d5 4a60f4d8a8b7591 ...


In [20]:
Taylor = song_data[song_data['artist'] == 'Taylor Swift']
Taylor[Taylor['user_id']].unique()


artist,listen_count,song,song_id,title
Taylor Swift,1,Love Story - Taylor Swift,SOTWSXL12A8C143349,Love Story
Taylor Swift,1,Our Song - Taylor Swift,SOUVGJL12AB017FC35,Our Song
Taylor Swift,2,Love Story - Taylor Swift,SOTWSXL12A8C143349,Love Story
Taylor Swift,1,You Belong With Me - Taylor Swift ...,SOSROFB12AAF3B4C5D,You Belong With Me
Taylor Swift,2,Love Story - Taylor Swift,SOTWSXL12A8C143349,Love Story
Taylor Swift,1,Love Story - Taylor Swift,SOTWSXL12A8C143349,Love Story
Taylor Swift,1,Crazier - Taylor Swift,SOYGKHG12AB01804C8,Crazier
Taylor Swift,9,Change - Taylor Swift,SORRBVQ12A58A7AA33,Change
Taylor Swift,3,Tim McGraw - Taylor Swift,SOCLMAD12AB017FC09,Tim McGraw
Taylor Swift,2,Love Story - Taylor Swift,SOTWSXL12A8C143349,Love Story

user_id
5cc8f91d7d1772e1bdac953ae 013dd210c298637 ...
97a7c186574fa04e670ae5602 8737a0173a55d48 ...
b8812e995a3120738728d9165 1c90447939d2e34 ...
f2cfc85dc979bdb90b0e8fcb7 2b130ddfbb599d6 ...
197f41d02b4a2ac2441425049 bc2f5d73fc9cd80 ...
9c3f10a849179127ab2b621af f26875f24173fc0 ...
810773e461ed2008c51ad7200 1b2c2877b9f7b02 ...
58123e3a5b24c3648d41fd55b 733a2300d110e52 ...
ae9fc4d72ea9492f2fdbfbc8d 70a57d09c4b1d18 ...
03887469fc392faeac4a93c87 45e9261408cfc79 ...


In [21]:
GaGa = song_data[song_data['artist'] == 'Lady GaGa']
GaGa[GaGa['user_id']].unique()

artist,listen_count,song,song_id,title
Lady GaGa,1,Disco Heaven - Lady GaGa,SOJVYJH12AB0180F4F,Disco Heaven
Lady GaGa,2,Alejandro - Lady GaGa,SOSCIZP12AB0181D2F,Alejandro
Lady GaGa,1,Alejandro - Lady GaGa,SOSCIZP12AB0181D2F,Alejandro
Lady GaGa,4,Monster - Lady GaGa,SOEYVHS12AB0181D31,Monster
Lady GaGa,1,Alejandro - Lady GaGa,SOSCIZP12AB0181D2F,Alejandro
Lady GaGa,1,Again Again - Lady GaGa,SOMONAP12AB0181D21,Again Again
Lady GaGa,1,Alejandro - Lady GaGa,SOSCIZP12AB0181D2F,Alejandro
Lady GaGa,1,Beautiful_ Dirty_ Rich - Lady GaGa ...,SOASXQD12AB018902F,Beautiful_ Dirty_ Rich
Lady GaGa,2,Beautiful_ Dirty_ Rich - Lady GaGa ...,SOASXQD12AB018902F,Beautiful_ Dirty_ Rich
Lady GaGa,1,Teeth - Lady GaGa,SOCBQKE12AB018548E,Teeth

user_id
0fa503d7140a7f2a238b2ef1c 5894b40878b8986 ...
62cbcee4597ea895c9146077f 99a887f033f52b7 ...
17e1bd8db46948ab3bdc24468 5c51d5b6730fd63 ...
e64cf25bf64967a5735040dc3 bf34866bb6aabf9 ...
aaf246c128ae96aaf1b50e44b 0193d5ca28450fc ...
4c28cd5e547e656bc5821853f 9819d0a94776cf7 ...
05c0dcea6310a9eabe015bf34 eb71f63ece7a732 ...
26ec10b5292c3019754522146 e20de3877e1a0c9 ...
a1a10f18dd03a3e5558a972f1 1a2ec1fda20df28 ...
de5b7f935683035a6cde7f588 6254fd293a73869 ...


## Using groupby-aggregate to find the most popular and least popular artist

In [22]:
song_data.groupby(
    key_column_names='artist', operations={'total_count': turicreate.aggregate.SUM('listen_count')}).sort(
    'total_count',ascending=True)

artist,total_count
William Tabbert,14
Reel Feelings,24
Beyoncé feat. Bun B and Slim Thug ...,26
Diplo,30
Boggle Karaoke,30
harvey summers,31
Nâdiya,36
Kanye West / Talib Kweli / Q-Tip / Common / ...,38
Jody Bernal,38
Aneta Langerova,38


In [23]:
song_data.groupby(
    key_column_names='artist', operations={'total_count': turicreate.aggregate.SUM('listen_count')}).sort(
    'total_count',ascending=False)

artist,total_count
Kings Of Leon,43218
Dwight Yoakam,40619
Björk,38889
Coldplay,35362
Florence + The Machine,33387
Justin Bieber,29715
Alliance Ethnik,26689
OneRepublic,25754
Train,25402
The Black Keys,22184
