# Content Based Recommendation

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot
%matplotlib inline
import seaborn as sns
import Recommenders

In [2]:
df = pd.read_csv('Dataset.csv', index_col='Unnamed: 0')
df.head()

  mask |= (ar1 == a)


Unnamed: 0,user_id,song_id,listen_count,song
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1,You're The One - Dwight Yoakam
1,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOEGIYH12A6D4FC0E3,1,Horn Concerto No. 4 in E flat K495: II. Romanc...
2,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOFLJQZ12A6D4FADA6,1,Tive Sim - Cartola
3,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOHTKMO12AB01843B0,1,Catch You Baby (Steve Pitron & Max Sanna Radio...
4,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SODQZCY12A6D4F9D11,1,El Cuatrero - Miguel Calo


In [3]:
df.shape

(1450932, 4)

In [4]:
df['song'].nunique()

162043

# Using Recommender code

In [5]:
df1 = df.head(50000)

In [6]:
ir = Recommenders.item_similarity_recommender_py()
ir.create(df1, 'user_id', 'song')

In [7]:
ir.get_similar_items(['Youre The One - Dwight Yoakam'])

no. of unique songs in the training set: 25834
Non zero values in cooccurence_matrix :0


Unnamed: 0,user_id,song,score,rank
0,,Are You Lonesome Tonight? - Chris Botti featur...,0.0,1
1,,Here's That Rainy Day - Chris Botti featuring ...,0.0,2
2,,The Way Home - Chris Botti,0.0,3
3,,What Are You Doing The Rest Of Your Life? - Ch...,0.0,4
4,,Non Posso Perderti - Bobby Solo,0.0,5
5,,Drive Time - Chris Botti,0.0,6
6,,Smile - Chris Botti featuring Steven Tyler,0.0,7
7,,When Love Breaks U Down - Glenn Jones,0.0,8
8,,Welcome To The Occupation (1999 Digital Remast...,0.0,9
9,,How - The Cranberries,0.0,10


# Using KNN

In [8]:
# each songs total listen count
music_listenCount = (df.groupby(['song'])['listen_count'].
    count().
    reset_index().
    rename(columns={'listen_count':'total_listen_count'})
    [['song', 'total_listen_count']])
music_listenCount.head()

Unnamed: 0,song,total_listen_count
0,Ef Ég Hefði Aldrei... - Johann Johannsson,1
1,Light Mass Prayers - Porcupine Tree,4
2,"The Arsonist Story"": Evil Craves Attention/O...",1
3,Ég Átti Gráa Æsku - Johann Johannsson,4
4,(Jack The Stripper) - Nekromantix,2


In [9]:
music_listenCount.shape

(162043, 2)

In [10]:
music_listenCount.describe()

Unnamed: 0,total_listen_count
count,162043.0
mean,8.953994
std,47.051838
min,1.0
25%,1.0
50%,2.0
75%,6.0
max,5043.0


In [11]:
listen_with_totalListenCount = df.merge(music_listenCount, left_on='song', right_on='song', how='left')
listen_with_totalListenCount.head()

Unnamed: 0,user_id,song_id,listen_count,song,total_listen_count
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1,You're The One - Dwight Yoakam,4136
1,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOEGIYH12A6D4FC0E3,1,Horn Concerto No. 4 in E flat K495: II. Romanc...,3272
2,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOFLJQZ12A6D4FADA6,1,Tive Sim - Cartola,2668
3,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOHTKMO12AB01843B0,1,Catch You Baby (Steve Pitron & Max Sanna Radio...,2097
4,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SODQZCY12A6D4F9D11,1,El Cuatrero - Miguel Calo,177


In [12]:
listen_with_totalListenCount.shape

(1450932, 5)

In [13]:
popularity_threshold = 200
listen_popular_songs = listen_with_totalListenCount.query('total_listen_count >= @popularity_threshold')
listen_popular_songs.head()

Unnamed: 0,user_id,song_id,listen_count,song,total_listen_count
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1,You're The One - Dwight Yoakam,4136
1,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOEGIYH12A6D4FC0E3,1,Horn Concerto No. 4 in E flat K495: II. Romanc...,3272
2,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOFLJQZ12A6D4FADA6,1,Tive Sim - Cartola,2668
3,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOHTKMO12AB01843B0,1,Catch You Baby (Steve Pitron & Max Sanna Radio...,2097
5,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOXLOQG12AF72A2D55,1,Unite (2009 Digital Remaster) - Beastie Boys,830


In [14]:
listen_popular_songs.shape

(299831, 5)

In [15]:
# Create a pivot table
music_feature_df = listen_popular_songs.pivot_table(index='song', columns='user_id', values='listen_count').fillna(0)
music_feature_df.head()

user_id,00014a76ed063e1a749171a253bca9d9a0ff1782,00015189668691680bb1a2e58afde1541ec92ced,0001ff7aa2667c8d8b945317b88adaed1c0b9dc2,0003798d61a8010dbfd8010180b7107cbbb8a649,0003d5245414b9c6244046543a06f601bfc97949,00040e91f2f614ca2f94ebc558dd0fc53ea2b875,0004dd6499e2e3f630f070416c437627de60d5c1,0005b81b840251543089e816fb790690e3914df2,00061a993bb3add5cc37b2ea28a9552337f2fe4d,0006a8a3733a7009f8bc109ccb3cdb59a19721ad,...,fff7d30a45a17d26768d020b75818008a741ef2c,fff7d93d0384f02498bcbaefccd76bee486da8cc,fff7e94f0a8d3c6c705b5f7c677d61da82aa545b,fffa8a20b865c4d2443785bab08bb498f0795a43,fffcfe9b89b791e39ff3e643aa57ae9303079c56,fffd0a1b26c37a55574d844277d176991d054f99,fffdef71f13352e9cff769f1d96f5ccf90f8955e,fffe29116f96c97b47a5dabdd406784ad0ba6f30,fffe5b73c50c72ca9c54947efac1fcacf59c4a17,fffed0bee753cd78494011b8b55dafd4f5c7e5ff
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Till I Collapse - Eminem / Nate Dogg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(Nice Dream) - Radiohead,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(iii) - The Gerbils,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15 Step - Radiohead,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16 Candles - The Crests,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
music_feature_df_matrix = csr_matrix(music_feature_df.values)

model_knn = NearestNeighbors(metric = 'cosine', algorithm='brute')
model_knn.fit(music_feature_df_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [17]:
music_feature_df.shape

(607, 81091)

In [18]:
query_index = np.random.choice(music_feature_df.shape[0])
print(query_index)
distance, indices = model_knn.kneighbors(music_feature_df.iloc[query_index, :].values.reshape(1, -1), n_neighbors=6)

447


In [19]:
music_feature_df.head(3)

user_id,00014a76ed063e1a749171a253bca9d9a0ff1782,00015189668691680bb1a2e58afde1541ec92ced,0001ff7aa2667c8d8b945317b88adaed1c0b9dc2,0003798d61a8010dbfd8010180b7107cbbb8a649,0003d5245414b9c6244046543a06f601bfc97949,00040e91f2f614ca2f94ebc558dd0fc53ea2b875,0004dd6499e2e3f630f070416c437627de60d5c1,0005b81b840251543089e816fb790690e3914df2,00061a993bb3add5cc37b2ea28a9552337f2fe4d,0006a8a3733a7009f8bc109ccb3cdb59a19721ad,...,fff7d30a45a17d26768d020b75818008a741ef2c,fff7d93d0384f02498bcbaefccd76bee486da8cc,fff7e94f0a8d3c6c705b5f7c677d61da82aa545b,fffa8a20b865c4d2443785bab08bb498f0795a43,fffcfe9b89b791e39ff3e643aa57ae9303079c56,fffd0a1b26c37a55574d844277d176991d054f99,fffdef71f13352e9cff769f1d96f5ccf90f8955e,fffe29116f96c97b47a5dabdd406784ad0ba6f30,fffe5b73c50c72ca9c54947efac1fcacf59c4a17,fffed0bee753cd78494011b8b55dafd4f5c7e5ff
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Till I Collapse - Eminem / Nate Dogg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(Nice Dream) - Radiohead,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(iii) - The Gerbils,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
for i in range(0, len(distance.flatten())):
    if i == 0:
        print('Recommendation for {0}:\n'.format(music_feature_df.index[query_index]))
    else:
        print('{0}: {1} --------> with distance of {2}'.format(i, music_feature_df.index[indices.flatten()[i]], distance.flatten()[i]))

Recommendation for Sincerité Et Jalousie - Alliance Ethnik:

1: I Gotta Feeling - Black Eyed Peas --------> with distance of 0.8858247421066081
2: You And Me (Wedding Version) - Lifehouse --------> with distance of 0.9351388099014356
3: It's My Own Fault - Johnny Winter --------> with distance of 0.9362203105130867
4: 16 Candles - The Crests --------> with distance of 0.9458959360327553
5: Whataya Want From Me - Adam Lambert --------> with distance of 0.948448774386278


# Using Pearson Correlation

In [21]:
songmat = listen_popular_songs.pivot_table(index='user_id',columns='song',values='listen_count').fillna(0)
songmat.head(5)

song,'Till I Collapse - Eminem / Nate Dogg,(Nice Dream) - Radiohead,(iii) - The Gerbils,15 Step - Radiohead,16 Candles - The Crests,21 Guns [feat. Green Day & The Cast Of American Idiot] (Album Version) - Green Day,3AM (LP Version) - matchbox twenty,A Beggar On A Beach Of Gold - Mike And The Mechanics,A-Punk (Album) - Vampire Weekend,Above The Clouds (Edited) - Gang Starr/Inspectah Deck,...,You Belong With Me - Taylor Swift,You Get What You Give - New Radicals,You Give Love A Bad Name - Bon Jovi,You Know I'm No Good - Amy Winehouse,You'll Be In My Heart - Phil Collins,You'll Never Know (My Love) (Bovellian 07 Mix) - Edwyn Collins,You're The One - Dwight Yoakam,You've Got The Love - Florence + The Machine,Young - Hollywood Undead,Your Touch - The Black Keys
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00014a76ed063e1a749171a253bca9d9a0ff1782,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00015189668691680bb1a2e58afde1541ec92ced,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0001ff7aa2667c8d8b945317b88adaed1c0b9dc2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0003798d61a8010dbfd8010180b7107cbbb8a649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0003d5245414b9c6244046543a06f601bfc97949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
song_user_listen = songmat['Alejandro - Lady GaGa']
similar=songmat.corrwith(song_user_listen )
df1=pd.DataFrame(similar,columns=['Correlation'])
df1.sort_values('Correlation',ascending=False).head(10)

Unnamed: 0_level_0,Correlation
song,Unnamed: 1_level_1
Alejandro - Lady GaGa,1.0
Bleeding Love - Leona Lewis,0.321869
The News - Jack Johnson,0.170485
Monster - Lady GaGa,0.156228
Cooler Than Me - Mike Posner,0.148896
Whataya Want From Me - Adam Lambert,0.137373
LoveStoned/I Think She Knows - Justin Timberlake,0.117436
Halo - Beyoncé,0.094648
Toxic - Britney Spears,0.093588
What Goes Around...Comes Around - Justin Timberlake,0.09003
