# Project - Music Recommendation

### Importing libraries

In [54]:
import pandas as pd
import numpy as np
import Recommenders

### Loading 'triplets_file' dataset

In [55]:
df1 = pd.read_csv('dataset/triplets_file.csv')
print('Shape of dataset-1 is: {}'.format(df1.shape))

Shape of dataset-1 is: (2000000, 3)


In [56]:
df1.head()

Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [57]:
df1.isnull().sum()

user_id         0
song_id         0
listen_count    0
dtype: int64

### Loading 'song' dataset

In [58]:
df2 = pd.read_csv('dataset/song_data.csv')
print('Shape of dataset-2 is: {}'.format(df2.shape))

Shape of dataset-2 is: (1000000, 5)


In [59]:
df2.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


### Combining both datasets to create one dataset

In [60]:
df = pd.merge(df1, df2.drop_duplicates(['song_id']), on='song_id', how='left')
print("Shape of merged dataset is: {}".format(df.shape))

Shape of merged dataset is: (2000000, 7)


In [61]:
df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [62]:
print("Length of dataset-1 is: {}".format(len(df1)))
print("Length of dataset-2 is: {}".format(len(df2)))
print("Length of merged dataset is: {}".format(len(df)))

Length of dataset-1 is: 2000000
Length of dataset-2 is: 1000000
Length of merged dataset is: 2000000


### Selecting only 50,000 record for creating the model

In [63]:
df = df.head(50000)
print("Length of new dataset is: {}".format(len(df)))

Length of new dataset is: 50000


### Creating new feature combining title and artist name

In [64]:
df['song'] = df['title']+' - '+df['artist_name']
df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,song
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0,The Cove - Jack Johnson
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976,Entre Dos Aguas - Paco De Lucia
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007,Stronger - Kanye West
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005,Constellations - Jack Johnson
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999,Learn To Fly - Foo Fighters


### Number of times a song was listened, based on group of song

In [65]:
df_grouped = df.groupby(['song']).agg({'listen_count':'count'}).reset_index()
df_grouped.head()

Unnamed: 0,song,listen_count
0,#!*@ You Tonight [Featuring R. Kelly] (Explici...,1
1,#40 - DAVE MATTHEWS BAND,7
2,& Down - Boys Noize,13
3,' Cello Song - Nick Drake,3
4,'97 Bonnie & Clyde - Eminem,5


### Total counts

In [66]:
grouped_sum = df_grouped['listen_count'].sum()
grouped_sum

50000

### Most popular song by percentage

In [67]:
df_grouped['percentage'] = (df_grouped['listen_count'] / grouped_sum ) * 100
df_grouped.sort_values(['listen_count', 'song'], ascending=[0,1])

Unnamed: 0,song,listen_count,percentage
6682,Sehr kosmisch - Harmonia,204,0.408
8509,Undo - Björk,182,0.364
1936,Dog Days Are Over (Radio Edit) - Florence + Th...,173,0.346
9256,You're The One - Dwight Yoakam,169,0.338
6348,Revelry - Kings Of Leon,166,0.332
...,...,...,...
9290,Your Time Has Come - Audioslave,1,0.002
9300,Zebra (full-length/album version) - John Butle...,1,0.002
9311,clouding - Four Tet,1,0.002
9312,high fives - Four Tet,1,0.002


---

### Loading popularity recommendation engine

In [69]:
engine = Recommenders.popularity_recommender_py()
engine.create(df, 'user_id', 'song')

### Displaying top 10 popular song for user 3

In [70]:
engine.recommend(df['user_id'][3])

Unnamed: 0,user_id,song,score,Rank
6682,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Sehr kosmisch - Harmonia,204,1.0
8509,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Undo - Björk,182,2.0
1936,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Dog Days Are Over (Radio Edit) - Florence + Th...,173,3.0
9256,b80344d063b5ccb3212f76538f3d9e43d87dca9e,You're The One - Dwight Yoakam,169,4.0
6348,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Revelry - Kings Of Leon,166,5.0
6671,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Secrets - OneRepublic,158,6.0
3372,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Horn Concerto No. 4 in E flat K495: II. Romanc...,134,7.0
2532,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Fireflies - Charttraxx Karaoke,128,8.0
8287,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Tive Sim - Cartola,117,9.0
3254,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Hey_ Soul Sister - Train,109,10.0


### Displaying top 10 popular song for user 250

In [72]:
engine.recommend(df['user_id'][250])

Unnamed: 0,user_id,song,score,Rank
6682,17aa9f6dbdf753831da8f38c71b66b64373de613,Sehr kosmisch - Harmonia,204,1.0
8509,17aa9f6dbdf753831da8f38c71b66b64373de613,Undo - Björk,182,2.0
1936,17aa9f6dbdf753831da8f38c71b66b64373de613,Dog Days Are Over (Radio Edit) - Florence + Th...,173,3.0
9256,17aa9f6dbdf753831da8f38c71b66b64373de613,You're The One - Dwight Yoakam,169,4.0
6348,17aa9f6dbdf753831da8f38c71b66b64373de613,Revelry - Kings Of Leon,166,5.0
6671,17aa9f6dbdf753831da8f38c71b66b64373de613,Secrets - OneRepublic,158,6.0
3372,17aa9f6dbdf753831da8f38c71b66b64373de613,Horn Concerto No. 4 in E flat K495: II. Romanc...,134,7.0
2532,17aa9f6dbdf753831da8f38c71b66b64373de613,Fireflies - Charttraxx Karaoke,128,8.0
8287,17aa9f6dbdf753831da8f38c71b66b64373de613,Tive Sim - Cartola,117,9.0
3254,17aa9f6dbdf753831da8f38c71b66b64373de613,Hey_ Soul Sister - Train,109,10.0


### Item similarity recommendation

In [73]:
engine_i = Recommenders.item_similarity_recommender_py()
engine_i.create(df, 'user_id', 'song')

### Displaying similar item for user 3

In [75]:
user_items = engine_i.get_user_items(df['user_id'][3])
for item in user_items:
    print(item)

The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia
Stronger - Kanye West
Constellations - Jack Johnson
Learn To Fly - Foo Fighters
Apuesta Por El Rock 'N' Roll - Héroes del Silencio
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery Corporation feat. Emiliana Torrini
Let It Be Sung - Jack Johnson / Matt Costa / Zach Gill / Dan Lebowitz / Steve Adams
I'll Be Missing You (Featuring Faith Evans & 112)(Album Version) - Puff Daddy
Love Shack - The B-52's
Clarity - John Mayer
I?'m A Steady Rollin? Man - Robert Johnson
The Old Saloon - The Lonely Island
Behind The Sea [Live In Chicago] - Panic At The Disco
Champion - Kanye West
Breakout - Foo Fighters
Ragged Wood - Fleet Foxes
Mykonos - Fleet Foxes
Country Road - Jack Johnson / Paula Fuga
Oh No - Andrew Bird
Love Song For No One - John Mayer
Jewels And Gold - Angus & Julia Stone
83 - John Mayer
Neon - John Mayer
The Middle - Jimmy Eat World
High and dry - Jorge Drexle

### Displaying similar item for user 250

In [77]:
user_items = engine_i.get_user_items(df['user_id'][250])
for item in user_items:
    print(item)

Aunt Eggma Blowtorch - Neutral Milk Hotel
Full Circle - Miley Cyrus
Poor Jackie - Man Man
Hot N Cold (Manhattan Clique Remix Radio Edit) - Katy Perry
Daisy And Prudence - Erin McKeown
Everythings Just Wonderful - Lily Allen
The Ballad of Michael Valentine - The Killers
I Stand Corrected (Album) - Vampire Weekend
They Might Follow You - Tiny Vipers
Monkey Man - Amy Winehouse
Bleeding Hearts - Soltero
Knee Deep At The National Pop League - Camera Obscura
Dreamlover - Mariah Carey
Wet Blanket - Metric
Guyamas Sonora - Beirut
Suspended From Class - Camera Obscura
Empty Skies - Kosheen
Fast As I Can - Erin McKeown
Just The Way You Are - Barry White
Sleeping In (Album) - Postal Service
Gimme Sympathy - Metric
You Mustn't Kick It Around - Erin McKeown
Help I'm Alive - Metric
Teenager - Camera Obscura
Last Night A DJ Saved My Life - Mariah Carey Featuring Busta Rhymes_ Fabulous And  DJ Clue
Stadium Love - Metric
Spare-Ohs - Andrew Bird
Interstate 8 - Modest Mouse
All The Things That Go To Make

### Song recommendation for user 3

In [79]:
engine_i.recommend(df['user_id'][3])

No. of unique songs for the user: 45
no. of unique songs in the training set: 9325
Non zero values in cooccurence_matrix :27764


Unnamed: 0,user_id,song,score,rank
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Questions - Jack Johnson,0.037157,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Great Indoors - John Mayer,0.036953,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Wrong Turn - Jack Johnson,0.03674,3
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Ghost Dream - Hymie's Basement,0.036463,4
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Baby I Want You - Amos Lee,0.036244,5
5,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Better That We Break - Maroon 5,0.035016,6
6,b80344d063b5ccb3212f76538f3d9e43d87dca9e,The Sharing Song - Jack Johnson,0.034193,7
7,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Better Days - Amos Lee,0.034193,8
8,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Dreamin' - Amos Lee,0.034193,9
9,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Mr.Curiosity (Album Version) - Jason Mraz,0.034193,10


### Song recommendation for user 250

In [80]:
engine_i.recommend(df['user_id'][250])

No. of unique songs for the user: 103
no. of unique songs in the training set: 9325
Non zero values in cooccurence_matrix :76631


Unnamed: 0,user_id,song,score,rank
0,17aa9f6dbdf753831da8f38c71b66b64373de613,La Llorona - Beirut,0.07824,1
1,17aa9f6dbdf753831da8f38c71b66b64373de613,Man Who Make You Sick - Man Man,0.077764,2
2,17aa9f6dbdf753831da8f38c71b66b64373de613,Rabbit Habits - Man Man,0.077337,3
3,17aa9f6dbdf753831da8f38c71b66b64373de613,Again & Again - the bird and the bee,0.077287,4
4,17aa9f6dbdf753831da8f38c71b66b64373de613,Banana Ghost - Man Man,0.074059,5
5,17aa9f6dbdf753831da8f38c71b66b64373de613,Girl Sailor (Album) - The Shins,0.072373,6
6,17aa9f6dbdf753831da8f38c71b66b64373de613,Last Day Of Magic - The Kills,0.072266,7
7,17aa9f6dbdf753831da8f38c71b66b64373de613,Mild Child (Album) - The Shins,0.072245,8
8,17aa9f6dbdf753831da8f38c71b66b64373de613,Werewolf (on the hood of yer heartbreak) - Man...,0.071609,9
9,17aa9f6dbdf753831da8f38c71b66b64373de613,Jumpers (Album) - Sleater-kinney,0.070409,10


---

### Recommending based on song name - test1

In [81]:
engine_i.get_similar_items(['Banana Ghost - Man Man', 'Rabbit Habits - Man Man'])

no. of unique songs in the training set: 9325
Non zero values in cooccurence_matrix :1545


Unnamed: 0,user_id,song,score,rank
0,,Acadian Coast - Soltero,0.403846,1
1,,Skinning the Drum - themselves,0.361111,2
2,,Man Who Make You Sick - Man Man,0.3375,3
3,,Last Day Of Magic - The Kills,0.333333,4
4,,Gold Teeth - Man Man,0.330357,5
5,,Living In Colour - Frightened Rabbit,0.315385,6
6,,Hubcap - Sleater-kinney,0.291667,7
7,,The Spirit Of Giving - The New Pornographers,0.291667,8
8,,Please_ Before I Go - Derek Webb,0.291667,9
9,,Grace Cathedral Hill - The Decemberists,0.291667,10


### Recommending based on song name - test2

In [82]:
engine_i.get_similar_items(['La Llorona - Beirut'])

no. of unique songs in the training set: 9325
Non zero values in cooccurence_matrix :797


Unnamed: 0,user_id,song,score,rank
0,,On a Bayonet - Beirut,0.625,1
1,,The Concubine - Beirut,0.6,2
2,,Postcards From Italy - Beirut,0.571429,3
3,,My Wife - Beirut,0.555556,4
4,,A Sunday Smile - Beirut,0.538462,5
5,,Mount Wroclai (Idle Days) - Beirut,0.5,6
6,,The Shrew - Beirut,0.5,7
7,,Brandenburg - Beirut,0.454545,8
8,,Prenzlauerberg - Beirut,0.444444,9
9,,The Gulag Orkestar - Beirut,0.4,10


---