# Build a song recommender system

In [1]:
import turicreate

# Load some music data

In [2]:
song_data = turicreate.SFrame('Song.sframe')

# Explore our data

In [3]:
song_data

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


## Show the most popular songs in the dataset

In [4]:
song_data['song'].show()

# Count the number of unique users in the data

In [5]:
users = song_data['user_id'].unique()

In [6]:
len(users)

66346

# Create a song recommender


In [7]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [8]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [9]:
popularity_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sehr kosmisch - Harmonia,4754.0,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Undo - Björk,4227.0,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,You're The One - Dwight Yoakam ...,3781.0,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Revelry - Kings Of Leon,3527.0,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Secrets - OneRepublic,3148.0,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Hey_ Soul Sister - Train,2538.0,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Tive Sim - Cartola,2521.0,10


# Build a recommender with personalization

In [10]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [11]:
personalized_model.recommend(users=[users[0]])

user_id,song,score,rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Riot In Cell Block Number Nine - Dr Feelgood ...,0.0374999940395355,1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Sei Lá Mangueira - Elizeth Cardoso ...,0.0331632643938064,2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,The Stallion - Ween,0.0322580635547637,3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Rain - Subhumans,0.0314159244298934,4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,West One (Shine On Me) - The Ruts ...,0.0306771993637084,5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Back Against The Wall - Cage The Elephant ...,0.0301204770803451,6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Life Less Frightening - Rise Against ...,0.0284431129693985,7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,A Beggar On A Beach Of Gold - Mike And The ...,0.023002490401268,8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Audience Of One - Rise Against ...,0.0193938463926315,9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...,Blame It On The Boogie - The Jacksons ...,0.0189873427152633,10


In [12]:
personalized_model.recommend(users=[users[1]])

user_id,song,score,rank
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Rabbit Heart (Raise It Up) - Florence + The ...,0.0799399726092815,1
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,You've Got The Love - Florence + The Machine ...,0.0616634935140609,2
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Cosmic Love - Florence + The Machine ...,0.05760308355093,3
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Bird Song - Florence + The Machine ...,0.0454223714768886,4
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Hardest Of Hearts - Florence + The Machine ...,0.039548572152853,5
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Hospital Beds - Florence + The Machine ...,0.0329669937491416,6
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Are You Hurting The One You Love? - Florence + ...,0.0326175801455974,7
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Bird Song Intro - Florence + The Machine ...,0.0291902534663677,8
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Hope - Apocalyptica,0.024524800479412,9
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Girl With One Eye (Bayou Percussion Version) - ...,0.0198598206043243,10


# Apply model to find similar songs in the data set

In [13]:
personalized_model.get_similar_items(['With Or Without You - U2'])

song,similar,score,rank
With Or Without You - U2,I Still Haven't Found What I'm Looking For ...,0.0428571701049804,1
With Or Without You - U2,Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...,0.033734917640686,2
With Or Without You - U2,Window In The Skies - U2,0.032835841178894,3
With Or Without You - U2,Vertigo - U2,0.030075192451477,4
With Or Without You - U2,Sunday Bloody Sunday - U2,0.0271317958831787,5
With Or Without You - U2,Bad - U2,0.0251798629760742,6
With Or Without You - U2,A Day Without Me - U2,0.0237154364585876,7
With Or Without You - U2,Another Time Another Place - U2 ...,0.0203251838684082,8
With Or Without You - U2,Walk On - U2,0.0202020406723022,9
With Or Without You - U2,Get On Your Boots - U2,0.0196850299835205,10


In [14]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

song,similar,score,rank
Chan Chan (Live) - Buena Vista Social Club ...,Murmullo - Buena Vista Social Club ...,0.1881188154220581,1
Chan Chan (Live) - Buena Vista Social Club ...,La Bayamesa - Buena Vista Social Club ...,0.1871921420097351,2
Chan Chan (Live) - Buena Vista Social Club ...,Amor de Loca Juventud - Buena Vista Social Club ...,0.1848341226577758,3
Chan Chan (Live) - Buena Vista Social Club ...,Diferente - Gotan Project,0.0214592218399047,4
Chan Chan (Live) - Buena Vista Social Club ...,Mistica - Orishas,0.0205761194229125,5
Chan Chan (Live) - Buena Vista Social Club ...,Hotel California - Gipsy Kings ...,0.0193049907684326,6
Chan Chan (Live) - Buena Vista Social Club ...,Nací Orishas - Orishas,0.0191571116447448,7
Chan Chan (Live) - Buena Vista Social Club ...,Gitana - Willie Colon,0.0187969803810119,8
Chan Chan (Live) - Buena Vista Social Club ...,Le Moulin - Yann Tiersen,0.0187969803810119,9
Chan Chan (Live) - Buena Vista Social Club ...,Criminal - Gotan Project,0.0187793374061584,10


# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [15]:
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0



Precision and recall summary statistics by cutoff




+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.017741385192766967 | 0.005011833005691752 |
|   2    | 0.017059024223814404 | 0.009621301970329612 |
|   3    | 0.01671784373933813  | 0.013575770868503728 |
|   4    | 0.01654725349709997  | 0.017925292797350122 |
|   5    | 0.015762538382804493 | 0.02142497618293875  |
|   6    | 0.014955077902877277 | 0.02447575482074852  |
|   7    | 0.01467076083248039  | 0.027812939452713095 |
|   8    | 0.013945752302968267 | 0.03051043150537403  |
|   9    | 0.01330603889457523  | 0.03297734743186006  |
|   10   | 0.012726032070965574 | 0.03532336090888685  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1



Precision and recall summary statistics by cutoff




+--------+-----------------------+-----------------------+
| cutoff |     mean_precision    |      mean_recall      |
+--------+-----------------------+-----------------------+
|   1    |  0.009553053565336063 | 0.0022444801871618655 |
|   2    |  0.008188331627430915 | 0.0037888426066522286 |
|   3    |  0.007505970658478324 |  0.005558931761592964 |
|   4    |  0.006908904810644829 |  0.006744872417850924 |
|   5    | 0.0063459570112589575 |  0.007880392949481993 |
|   6    |  0.006027521892414419 |  0.009126107885063867 |
|   7    |  0.005751328166885997 |  0.010029706920699757 |
|   8    |  0.005458887751620605 |  0.010969048673756946 |
|   9    |  0.005231434095303073 |  0.011708703840228913 |
|   10   |  0.005254179460934833 |  0.01300034029051429  |
+--------+-----------------------+-----------------------+
[10 rows x 3 columns]



The table shows that the personalized model provides much better performance.

# ASSIGNMENT


## Count the unique users who listened to specific artists

In [16]:
artists = ['Kanye West', 'Foo Fighters', 'Taylor Swift', 'Lady GaGa']

In [17]:
for artist in artists:
    song_data_for_artist = song_data[song_data['artist'] == artist]
    users_for_artist = song_data_for_artist['user_id'].unique()
    print(artist + ': ' + str(len(users_for_artist)))

Kanye West: 2522
Foo Fighters: 2055
Taylor Swift: 3246
Lady GaGa: 2928


## Using groupby-aggregate to find the most popular and least popular artist

In [23]:
groups = song_data.groupby('artist', operations={'total_count': turicreate.aggregate.SUM('listen_count')})
grouped

artist,total_count
Michael Nyman,255
Bayside,411
Aaliyah,1566
Parkway Drive,203
Bitty McLean,535
Bring Me The Horizon,1112
Bullet For My Valentine,3620
Boom Bip,140
Delta Spirit,98
Groove Armada,631


In [24]:
groups.sort('total_count',ascending=False)

artist,total_count
Kings Of Leon,43218
Dwight Yoakam,40619
Björk,38889
Coldplay,35362
Florence + The Machine,33387
Justin Bieber,29715
Alliance Ethnik,26689
OneRepublic,25754
Train,25402
The Black Keys,22184


### Least popular artist

In [25]:
grouped.sort('total_count')

artist,total_count
William Tabbert,14
Reel Feelings,24
Beyoncé feat. Bun B and Slim Thug ...,26
Diplo,30
Boggle Karaoke,30
harvey summers,31
Nâdiya,36
Aneta Langerova,38
Jody Bernal,38
Kanye West / Talib Kweli / Q-Tip / Common / ...,38


## Using groupby-aggregate to find the most recommended songs

### We first need to create a personalized reccomendation model
### Split the data

In [26]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Build a recommender model with personalization

In [27]:
item_recommender = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

In [28]:
subset_test_users = test_data['user_id'].unique()

In [30]:
item_recommender.recommend(subset_test_users,k=1)

user_id,song,score,rank
18fafad477f9d72ff86f7d0bd 838a6573de0f64a ...,Rabbit Heart (Raise It Up) - Florence + The ...,0.0799399726092815,1
7c5ab90ba508502a888843a71 eef80bf186c7b88 ...,Hypnopaedia - Octopus Project ...,0.0306122501691182,1
974ed1d7e1b2253787987a36a 40ea6e74f893238 ...,Secrets - OneRepublic,0.0384316630661487,1
f9d2599a999d601ad598fe0cb 79b41329ba3d42c ...,Hello - Lionel Richie,0.0246153831481933,1
8e371e084913c4cc2f5bb48d5 070df25d53369a2 ...,Costruire - Niccolò Fabi,0.0309496323267618,1
c8cf3b33060579e75142ad4d3 803f0b4c7ad921f ...,Sunspots - Nine Inch Nails ...,0.026633880640331,1
251f602d5c644925ac89b840e b1a22d9026bde18 ...,Ten Cent Pistol - The Black Keys ...,0.0726216907302538,1
0bac31bb00006b63cab862efe 85c3e92ce604147 ...,Runnin' With The Devil - Van Halen ...,0.0546058813730875,1
e238e498000d3f678d2cd4085 eddc256b9d7ed58 ...,Stranger - Angus & Julia Stone ...,0.0345911930004755,1
1c7e130c6dc23c133d8966f81 ca55dfe8316d78f ...,West One (Shine On Me) - The Ruts ...,0.0146723517349788,1


In [31]:
grouped = song_data.groupby(key_column_names='song', operations={'count': turicreate.aggregate.COUNT()})

In [32]:
grouped.sort('count',ascending=False)

song,count
Sehr kosmisch - Harmonia,5970
Undo - Björk,5281
You're The One - Dwight Yoakam ...,4806
Dog Days Are Over (Radio Edit) - Florence + The ...,4536
Revelry - Kings Of Leon,4339
Horn Concerto No. 4 in E flat K495: II. Romance ...,3949
Secrets - OneRepublic,3916
Tive Sim - Cartola,3185
Fireflies - Charttraxx Karaoke ...,3171
Hey_ Soul Sister - Train,3132
