In [1]:
import numpy as np
import pandas as pd
import lda

from TopicModeling import *
from Recommender import *

# 1. Data

In [2]:
listen_df = pd.read_csv('Lastfm_listening_data.csv', encoding = "ISO-8859-1")

## Tag

In [3]:
track_df = itemDataframe(listen_df)
lastfm_network = lastfmAuth()
tracktag_dic = lastfmCrawlTag(track_df, lastfm_network)
track, tag, tracktag_m = tracktagMatrix(tracktag_dic)

In [4]:
listen_df = listen_df[listen_df['track_id'].isin(track)]

In [5]:
listen_df.head()

Unnamed: 0,u_id,time,artist_id,artist_name,track_id,track_name
0,user_000031,2009-05-01 10:37:23,9c1ff574-2ae4-4fea-881f-83293d0d5881,...And You Will Know Us By The Trail Of Dead,fe8be4ac-bbe8-48f3-bcac-25f45cb7d75e,Source Tags And Codes
1,user_000291,2009-05-01 09:02:09,6aa40207-fec8-43a7-991d-b872a42def05,Amy Macdonald,21bd2851-63a2-455b-800b-ea1aa4900a99,This Is The Life
2,user_000291,2009-05-01 09:05:37,83d91898-7763-47d7-b03b-b92132375c47,Pink Floyd,feecff58-8ee2-4a7f-ac23-dc8ce7925286,Wish You Were Here
3,user_000291,2009-05-01 09:12:12,83d91898-7763-47d7-b03b-b92132375c47,Pink Floyd,2d982a74-a21a-4714-9d81-405ede915053,Comfortably Numb
4,user_000291,2009-05-01 09:22:06,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,U2,44135d7f-f5d6-4ef9-8518-ed11f5b705e7,One


In [6]:
listen_df.u_id.nunique()

10

In [7]:
listen_df.track_id.nunique()

182

# 2. Topic Modeilng - LDA

In [8]:
LDAModel = lda.LDA(n_topics=30)
LDAModel.fit(tracktag_m.astype(int))

<lda.lda.LDA at 0x2af8a7ebc18>

In [9]:
tracktopic_m = LDAModel.doc_topic_
topictag_m = LDAModel.topic_word_

In [10]:
n_top_words = 10
for i, topic_dist in enumerate(topictag_m):
    topic_words = np.array(tag)[np.argsort(topic_dist)][:-(n_top_words + 1):-1]
    print('Topic {}, {}'.format(i+1, ','.join(topic_words)))

Topic 1, metal,alternative metal,nu metal,soundtrack,rock,deftones,alternative,hard rock,system of a down,queen of the damned
Topic 2, beautiful,love,mellow,sad,melancholic,indie,melancholy,romantic,shoegaze,ballad
Topic 3, alternative,rock,emo,evanescence,sad,melancholy,one tree hill,alternative rock,epic,emotional
Topic 4, folk,indie,singer-songwriter,indie folk,acoustic,happy,comedy,gypsy,alternative,anti-folk
Topic 5, electronic,electronica,dance,new rave,electro,indie,british,electropop,indietronica,electroclash
Topic 6, gothic,industrial,gothic rock,electronic,female vocalists,synth-rock,darkwave,synthpop,gothic metal,synth rock
Topic 7, hip-hop,electronic,dance,electronica,electro,crunk,alternative,pop,electro rap,richman
Topic 8, rock,alternative,alternative rock,emo,one tree hill,jimmy eat world,indie,love,awesome,muse
Topic 9, placebo,alternative,british,indie,britpop,rock,heard on pandora,indie rock,modern rock,all time favourites
Topic 10, heavy metal,hard rock,metal,rock,s

In [11]:
tracktopic_m.shape

(182, 30)

In [12]:
topictag_m.shape

(30, 4092)

# 3. Recommendation

## Item-based Collaborative Filtering

In [13]:
user, item, rating_m = ratingMatrix(listen_df)
item_sim_m = simCosMatrix(rating_m, axis=1)

In [15]:
score_cf_m, result_cf_df = recommenderCF(rating_m, item_sim_m, user, item, cf_type='item', n=10, k=182)

Select neighborhood items...
Compute predictions...
Recommend Top-n item...


## Topic-based Content-based Filtering

In [16]:
topic_sim_m = simCosMatrix(tracktopic_m, axis=0)
score_cbf_m, result_cbf_df = recommenderCF(rating_m, topic_sim_m, user, item, cf_type='item', n=10, k=182)

Select neighborhood items...
Compute predictions...
Recommend Top-n item...


## Hybird Recommendation

In [17]:
score_hybrid_m = hybridScore(score_cf_m, score_cbf_m, c=0.1)
result_hybrid_df = topnList(score_hybrid_m, user, item, 10)

# 4. Top-n Recommendation List

## Item-based Collaborative Filtering

In [18]:
result_cf_df = result_cf_df.merge(track_df, on='track_id').sort_values(['u_id', 'rank'])

In [19]:
result_cf_df[result_cf_df['u_id'] == "user_000524"]

Unnamed: 0,u_id,track_id,score,rank,artist_id,artist_name,track_name
14,user_000524,7ea0d658-b2a9-4a8d-a346-48fb2b8acf6a,0.099504,1,b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d,The Beatles,Here Comes The Sun
15,user_000524,09a0040e-fd69-4dc9-aa1f-22311108a964,0.099504,2,3ac2a4a2-52b3-498b-bbc8-31443c68dfe0,Missy Higgins,The River
16,user_000524,210005fc-3b04-4112-9b39-5741244f71c7,0.099504,3,b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d,The Beatles,Maxwell'S Silver Hammer
17,user_000524,21bd2851-63a2-455b-800b-ea1aa4900a99,0.099504,4,6aa40207-fec8-43a7-991d-b872a42def05,Amy Macdonald,This Is The Life
18,user_000524,97a521ad-9036-408b-9572-e1d63b872e06,0.099504,5,7952b266-9fd4-4a09-a324-7dc84f11b5fc,The John Butler Trio,What You Want
19,user_000524,94a2565f-9dd3-46bd-a70f-80c41224f561,0.099504,6,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,U2,"Stay (Faraway, So Close!)"
20,user_000524,171ad8ee-51ae-48c3-ae30-35436b2411af,0.099504,7,a4e34a43-d8de-48eb-9ec6-349b62756590,T?l?phone,Un Autre Monde
21,user_000524,8d69d3fc-5e0e-4c84-97eb-1891ca0d66e8,0.099504,8,a41ac10f-0a56-4672-9161-b83f9b223559,Van Morrison,Brown Eyed Girl
22,user_000524,8b6d9029-1990-4660-bd57-24b5bc0fb626,0.099504,9,d4d17620-fd97-4574-92a8-a2cb7e72ce42,The Verve,Bitter Sweet Symphony
23,user_000524,ba7ed38e-f5b0-438c-9df3-d5e061c353dd,0.099504,10,7952b266-9fd4-4a09-a324-7dc84f11b5fc,The John Butler Trio,Ocean


## Topic-based Content-based Filtering

In [20]:
result_cbf_df = result_cbf_df.merge(track_df, on='track_id').sort_values(['u_id', 'rank'])

In [21]:
result_cbf_df[result_cbf_df['u_id'] == "user_000524"]

Unnamed: 0,u_id,track_id,score,rank,artist_id,artist_name,track_name
74,user_000524,5b1e09e5-5f6f-447f-815b-57607acf6339,0.225121,1,8c9200b8-8e05-41d5-836e-44a37905560e,Hadouken!,Get Smashed Gate Crash
76,user_000524,09a0040e-fd69-4dc9-aa1f-22311108a964,0.220694,2,3ac2a4a2-52b3-498b-bbc8-31443c68dfe0,Missy Higgins,The River
78,user_000524,08ddb3fb-8f4c-4ea2-adaf-06d7df53e155,0.208642,3,e795e03d-b5d5-4a5f-834d-162cfb308a2c,Pj Harvey,This Is Love
15,user_000524,5447420a-88fd-4f5c-a293-bda90cbc0f44,0.20496,4,89618a45-ff4a-4e5f-942e-3ef93c8c555c,Witchfinder General,Satan'S Children (Live)
79,user_000524,06d6f9ad-401b-4215-8304-e7af3f3692b4,0.191193,5,b7ffd2af-418f-4be2-bdd1-22f8b48613da,Nine Inch Nails,Wish
80,user_000524,910a40bb-3b77-48cb-b978-77eae3d0398f,0.191093,6,efef848b-63e4-4323-8ef7-69a48fbdd51d,4 Non Blondes,What'S Up
81,user_000524,9839b527-a36d-4efb-8682-526601be5131,0.165318,7,8ca01f46-53ac-4af2-8516-55a909c0905e,My Bloody Valentine,Sometimes
82,user_000524,9391e18a-a0f4-443c-9343-69af2849abd5,0.153321,8,95e1ead9-4d31-4808-a7ac-32c3614c116b,The Killers,Daddy'S Eyes
62,user_000524,924de9ff-b23a-40ef-ae20-153d42472e6d,0.152915,9,847e8284-8582-4b0e-9c26-b042a4f49e57,Placebo,Hare Krishna
83,user_000524,910e91d3-e711-4b49-9de4-0caf5c536174,0.15222,10,89618a45-ff4a-4e5f-942e-3ef93c8c555c,Witchfinder General,Soviet Invasion


## Hybird Recommendation

In [22]:
result_hybrid_df = result_hybrid_df.merge(track_df, on='track_id').sort_values(['u_id', 'rank'])

In [23]:
result_hybrid_df[result_hybrid_df['u_id'] == "user_000524"]

Unnamed: 0,u_id,track_id,score,rank,artist_id,artist_name,track_name
74,user_000524,5b1e09e5-5f6f-447f-815b-57607acf6339,1.0,1,8c9200b8-8e05-41d5-836e-44a37905560e,Hadouken!,Get Smashed Gate Crash
76,user_000524,09a0040e-fd69-4dc9-aa1f-22311108a964,0.998034,2,3ac2a4a2-52b3-498b-bbc8-31443c68dfe0,Missy Higgins,The River
78,user_000524,08ddb3fb-8f4c-4ea2-adaf-06d7df53e155,0.99268,3,e795e03d-b5d5-4a5f-834d-162cfb308a2c,Pj Harvey,This Is Love
79,user_000524,06d6f9ad-401b-4215-8304-e7af3f3692b4,0.984929,4,b7ffd2af-418f-4be2-bdd1-22f8b48613da,Nine Inch Nails,Wish
80,user_000524,910a40bb-3b77-48cb-b978-77eae3d0398f,0.984885,5,efef848b-63e4-4323-8ef7-69a48fbdd51d,4 Non Blondes,What'S Up
81,user_000524,9839b527-a36d-4efb-8682-526601be5131,0.973435,6,8ca01f46-53ac-4af2-8516-55a909c0905e,My Bloody Valentine,Sometimes
82,user_000524,9391e18a-a0f4-443c-9343-69af2849abd5,0.968106,7,95e1ead9-4d31-4808-a7ac-32c3614c116b,The Killers,Daddy'S Eyes
83,user_000524,924de9ff-b23a-40ef-ae20-153d42472e6d,0.967926,8,847e8284-8582-4b0e-9c26-b042a4f49e57,Placebo,Hare Krishna
84,user_000524,290dcfd6-827e-4864-b71a-553c45ba526b,0.966527,9,99acd557-c4e2-4086-9be9-85f57184dadc,O.S.T.R.,Pocz?tek...
85,user_000524,1e21e59e-ae7c-4658-bfe4-1e0c749d19c4,0.965959,10,c3f28da8-662d-4f09-bdc7-3084bf685930,Iron & Wine,Free Until They Cut Me Down
