# Music Recommendation Engine

In [3]:
%matplotlib inline
import pandas as pd
import numpy as np
from numpy import int64

import requests
import IPython.display as Disp
import sklearn
from sklearn.decomposition import TruncatedSVD

### Read dataset that shows metadata of each song into Pandas dataframe

In [4]:
songs_metadata_file = 'C:/Users/aryam/Desktop/ML/song_data.csv'
songs_df =  pd.read_csv(songs_metadata_file)
songs_df.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


In [5]:
songs_df.describe()

Unnamed: 0,year
count,1000000.0
mean,1030.325652
std,998.745002
min,0.0
25%,0.0
50%,1969.0
75%,2002.0
max,2011.0


In [6]:
songs_df.groupby("artist_name")["song_id"].count().sort_values(ascending=False)

artist_name
Michael Jackson                            194
Johnny Cash                                193
Beastie Boys                               187
Joan Baez                                  181
Neil Diamond                               176
                                          ... 
Optical & Fierce                             1
Don Omar / Fat Joe / N.O.R.E. / LDA          1
Don Omar / Gilberto Santa Rosa               1
Don Omar / Mackie Ranks                      1

Little Louie" Vega Feat. Arnold Jarvis      1
Name: song_id, Length: 72665, dtype: int64

In [7]:
Filter_Artist=songs_df['artist_name']=='Queen'
songs_df[Filter_Artist]

Unnamed: 0,song_id,title,release,artist_name,year
25576,SOHRDNI12D02199058,Need Your Loving Tonight (1994 Digital Remaster),The Game,Queen,1980
40232,SOXKDNE12A8C13F0E4,All Dead All Dead (1993 Digital Remaster),News Of The World,Queen,1977
48260,SONGOJV12AF729AEBD,Somebody To Love,Queen On Fire - Live At The Bowl,Queen,1976
50123,SOIVFZP12A8C13C023,God Save The Queen,Queen On Fire - Live At The Bowl,Queen,1975
51874,SOUUNTL12D021937DC,A Kind Of Magic,A Kind Of Magic,Queen,1986
...,...,...,...,...,...
961029,SOIUMCM12AB017DB27,Somebody To Love (2008 Digital Remaster),The Singles Collection,Queen,0
965526,SOPQJAZ12A6310F168,Who Wants To Live Forever (With Commentary),Absolute Greatest (Includes track by track com...,Queen,2009
974190,SOUSWOP12D021B0F86,Innuendo,Innuendo,Queen,1991
977769,SOLDMIM12A8C13E1A1,Life Is Real (Song For Lennon) (1994 Digital R...,Hot Space,Queen,1982


### Read dataset that shows how many times a user plays each song into pandas dataframe

In [8]:
triplets_file = 'C:/Users/aryam/Desktop/ML/10000.txt'
songs_to_user_df = pd.read_table(triplets_file,header=None)
songs_to_user_df.columns = ['user_id', 'song_id', 'listen_count']
songs_to_user_df.head()

Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [9]:
songs_to_user_df.describe()

Unnamed: 0,listen_count
count,2000000.0
mean,3.045485
std,6.57972
min,1.0
25%,1.0
50%,1.0
75%,3.0
max,2213.0


In [10]:
songs_to_user_df.groupby('user_id')['listen_count'].count().sort_values(ascending=False)

user_id
6d625c6557df84b60d90426c0116138b617b9449    711
fbee1c8ce1a346fa07d2ef648cec81117438b91f    643
4e11f45d732f4861772b2906f81a7d384552ad12    556
24b98f8ab023f6e7a1c37c7729c623f7b821eb95    540
1aa4fd215aadb160965110ed8a829745cde319eb    533
                                           ... 
10d3b027f494805b9223551e3db03f903953e2cf      1
87c22fcd7f5f833a8e33ba8bc5c7f4863dab5aa8      1
421be8356c6464ae9da340754c1b0b9510ae50b5      1
87a2826a059570052283d542fc03651c3a570afb      1
bec79e2e90bf0fe7238385b2ae6af711dd6c6d1d      1
Name: listen_count, Length: 76353, dtype: int64

## Merge songs and songs to user dataset

In [11]:
combined_songs_df = pd.merge(songs_to_user_df, songs_df, on='song_id')

In [12]:
combined_songs_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,7c86176941718984fed11b7c0674ff04c029b480,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,SOAKIMP12A8C130995,3,The Cove,Thicker Than Water,Jack Johnson,0
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
4,3f73f44560e822344b0fb7c6b463869743eb9860,SOAKIMP12A8C130995,6,The Cove,Thicker Than Water,Jack Johnson,0


### Get most listened songs

In [13]:
combined_songs_df.groupby('song_id')['listen_count'].count().sort_values(ascending=False)

song_id
SOFRQTD12A81C233C0    8277
SOWCKVR12A8C142411    7952
SOAUWYT12A81C206F1    7032
SOAXGDH12A8C13F8A1    6949
SOBONKR12A58A7A7E0    6412
                      ... 
SOWNLZF12A58A79811      51
SOLIGVL12AB017DBAE      51
SOBPGWB12A6D4F7EF3      50
SOYYBJJ12AB017E9FD      48
SOGSPGJ12A8C134FAA      48
Name: listen_count, Length: 10000, dtype: int64

In [14]:
combined_songs_df.groupby('title')['listen_count'].count().sort_values(ascending=False)

title
Sehr kosmisch                     8277
Use Somebody                      7952
Undo                              7032
Dog Days Are Over (Radio Edit)    6949
You're The One                    6729
                                  ... 
Historia Del Portero                51
Scared                              51
Don´t Leave Me Now                  50
Ghosts (Toxic Avenger Mix)          48
No Creo En El Jamas                 48
Name: listen_count, Length: 9593, dtype: int64

In [15]:
#songs_df_2 = pd.DataFrame(combined_songs_df.groupby('title')['listen_count'].count())
songs_df_2 = pd.DataFrame({'count' : combined_songs_df.groupby( [ "title"] ).size()}).reset_index()
songs_df_2.columns=['title','count']
#songs_df_2.head()
songs_df_2[(songs_df_2['count'] > 3000)  & (songs_df_2['count']<3113) ].head()
song_title = str(songs_df_2[songs_df_2['count'] ==3113 ]['title'].values[0])
print("this is title")
print(song_title)


this is title
Billionaire [feat. Bruno Mars]  (Explicit Album Version)


In [16]:
combined_songs_df.groupby('artist_name')['listen_count'].count().sort_values(ascending=False)

artist_name
Coldplay            32572
Kings Of Leon       26169
The Black Keys      19862
Jack Johnson        19590
Muse                19282
                    ...  
Shotta                 54
The Four Seasons       52
Ricardo Montaner       52
Umphrey's McGee        52
Amparanoia             50
Name: listen_count, Length: 3379, dtype: int64

In [17]:
Filter = combined_songs_df['song_id']=="SOWCKVR12A8C142411"
combined_songs_df[Filter]['artist_name'].unique()

array(['Kings Of Leon'], dtype=object)

### Create Pivot Table of User Vs Songs

In [19]:
ct_df = combined_songs_df.pivot_table(values='listen_count', index='user_id', columns='title', fill_value=0)

MemoryError: Unable to allocate 5.46 GiB for an array with shape (76353, 9593) and data type float64

In [None]:
ct_df.head()

In [None]:
X = ct_df.values.T
X.shape

### Compress dataset by applying Singular Value Decomposition (SVD)

In [21]:
SVD  = TruncatedSVD(n_components=20, random_state=17)
result_matrix = SVD.fit_transform(X)
result_matrix.shape

(9593, 20)

### Create Pearson coorelation matrix

In [22]:
corr_mat = np.corrcoef(result_matrix)
corr_mat.shape


(9593, 9593)

In [23]:
corr_mat[0][2]

0.5110047853799956

### Print books related to specified book


In [2]:
song_names = ct_df.columns
song_list = list(song_names)
print(song_list)


NameError: name 'ct_df' is not defined

In [1]:
query_index = song_list.index("Yesterday")

print(query_index)

NameError: name 'song_list' is not defined

In [26]:
corr_similar_songs = corr_mat[query_index]
corr_similar_songs.shape
print(corr_similar_songs)
print(type(song_list))
print((corr_similar_songs<1.0) & (corr_similar_songs>0.9))

[0.76008426 0.7608103  0.67635799 ... 0.87151773 0.75495399 0.80549467]
<class 'list'>
[False False False ... False False False]


In [27]:
list(song_names[(corr_similar_songs<1.0) & (corr_similar_songs>0.98)])

['Billionaire [feat. Bruno Mars]  (Explicit Album Version)',
 'Black',
 'Bulletproof',
 'Clocks',
 'Fag Hag',
 'Fireflies',
 'Girls_ Girls_ Girls',
 'Half Of My Heart',
 'How You Remind Me',
 'If I Had You',
 "Livin' On A Prayer",
 'OMG',
 'Resistance',
 'Supermassive Black Hole (Album Version)',
 'Supermassive Black Hole (Twilight Soundtrack Version)',
 'Un Violinista En Tu Tejado',
 'Uprising']