In [168]:
import pandas
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [169]:
#Importing Data

triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
song_df_1 = pandas.read_table(triplets_file,header=None)
song_df_1.columns = ['user_id', 'song_id', 'listen_count']
print("Dimension of song_df_1:",song_df_1.shape)
song_df_1.head()

Dimension of song_df_1: (2000000, 3)


Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [170]:
songs_metadata_file = 'song_data.csv'
song_df_2 =  pandas.read_csv(songs_metadata_file)
print("Dimension of song_df_2:",song_df_2.shape)
song_df_2.head()

Dimension of song_df_2: (1000000, 5)


Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


###### MERGING 'Song_df_1' AND 'Song_df_2'

In [172]:
song_df = pandas.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on="song_id", how="left")
print("Dimension of song_df after merging:",song_df.shape)
song_df.head()

Dimension of song_df after merging: (2000000, 7)


Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [173]:
# Considering only top 20000 entries 
song_df = song_df.head(10000)

song_df['song'] = song_df['title'].map(str) + " - " + song_df['artist_name']
song_df = song_df[['user_id', 'song_id', 'listen_count','song']]
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,song
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove - Jack Johnson
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas - Paco De Lucia
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger - Kanye West
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations - Jack Johnson
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly - Foo Fighters


In [174]:
# The items are grouped by song aggregated and the index is reseted.
song_df_grouped = song_df.groupby(['song'])['listen_count'].count().reset_index()

# The column named listen_count is replaced by the name score.
song_df_grouped.rename(columns = {'listen_count': 'score'}, inplace = True)


# The training data is sorted according to the score in descending order and by item_id in ascending order.
song_df_sort = song_df_grouped.sort_values(['score', 'song'], ascending = [0,1])

# The new column named Rank is created by score sorted in ascending order.
song_df_sort['Rank'] = song_df_sort['score'].rank(ascending = 0, method = 'first')


# The first 10 items are saved into the popularity_recommendataions and it is returned. 
print("Dimension of song_df_sort:", song_df_sort.shape)
song_df_sort.head(10)

Dimension of song_df_sort: (5151, 3)


Unnamed: 0,song,score,Rank
3660,Sehr kosmisch - Harmonia,45,1.0
4678,Undo - Björk,32,2.0
5105,You're The One - Dwight Yoakam,32,3.0
1071,Dog Days Are Over (Radio Edit) - Florence + Th...,28,4.0
3655,Secrets - OneRepublic,28,5.0
4378,The Scientist - Coldplay,27,6.0
4712,Use Somebody - Kings Of Leon,27,7.0
3476,Revelry - Kings Of Leon,26,8.0
1387,Fireflies - Charttraxx Karaoke,24,9.0
1862,Horn Concerto No. 4 in E flat K495: II. Romanc...,23,10.0


In [166]:
#Unique 'user_id' and unique 'song'
users = song_df['user_id'].unique()
items = song_df['song'].unique()

print("Number of unique users:", len(song_df['user_id'].unique()))
print("Number of unique song:", len(song_df['song'].unique()))

Number of unique users: 365
Number of unique song: 5151


#### Getting the detail of top 10 rated songs for a particular user_id

In [167]:
# getting the detail of top 10 rated songs for a particular user_id('4bd88bfb25263a75bbdd467e74018f4ae570e5df')
song_df_sort['user_id'] = users[5]
song_df_sort = song_df_sort[['user_id','song','score','Rank']]
song_df_sort.head(10)

Unnamed: 0,user_id,song,score,Rank
3660,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Sehr kosmisch - Harmonia,45,1.0
4678,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Undo - Björk,32,2.0
5105,4bd88bfb25263a75bbdd467e74018f4ae570e5df,You're The One - Dwight Yoakam,32,3.0
1071,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Dog Days Are Over (Radio Edit) - Florence + Th...,28,4.0
3655,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Secrets - OneRepublic,28,5.0
4378,4bd88bfb25263a75bbdd467e74018f4ae570e5df,The Scientist - Coldplay,27,6.0
4712,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Use Somebody - Kings Of Leon,27,7.0
3476,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Revelry - Kings Of Leon,26,8.0
1387,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Fireflies - Charttraxx Karaoke,24,9.0
1862,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Horn Concerto No. 4 in E flat K495: II. Romanc...,23,10.0


In [175]:
# getting the detail of top 10 rated songs for a particular user_id('b1269307f2ae8c17062c6aea2502b099aad517b6')
song_df_sort['user_id'] = users[100]
song_df_sort = song_df_sort[['user_id','song','score','Rank']]
song_df_sort.head(10)

Unnamed: 0,user_id,song,score,Rank
3660,b1269307f2ae8c17062c6aea2502b099aad517b6,Sehr kosmisch - Harmonia,45,1.0
4678,b1269307f2ae8c17062c6aea2502b099aad517b6,Undo - Björk,32,2.0
5105,b1269307f2ae8c17062c6aea2502b099aad517b6,You're The One - Dwight Yoakam,32,3.0
1071,b1269307f2ae8c17062c6aea2502b099aad517b6,Dog Days Are Over (Radio Edit) - Florence + Th...,28,4.0
3655,b1269307f2ae8c17062c6aea2502b099aad517b6,Secrets - OneRepublic,28,5.0
4378,b1269307f2ae8c17062c6aea2502b099aad517b6,The Scientist - Coldplay,27,6.0
4712,b1269307f2ae8c17062c6aea2502b099aad517b6,Use Somebody - Kings Of Leon,27,7.0
3476,b1269307f2ae8c17062c6aea2502b099aad517b6,Revelry - Kings Of Leon,26,8.0
1387,b1269307f2ae8c17062c6aea2502b099aad517b6,Fireflies - Charttraxx Karaoke,24,9.0
1862,b1269307f2ae8c17062c6aea2502b099aad517b6,Horn Concerto No. 4 in E flat K495: II. Romanc...,23,10.0
