In [1]:
import numpy as np
import pandas as pd
from lightfm import LightFM
from scipy.sparse import coo_matrix

In [60]:
# Preprocessing

users = {}   # k (given id). v (len of itself)
artists = {} # k (given id). v (tuple: name, len of itself)

data, i, j = [], [], []  # coo_matrix parameters
jnames = []

with open('/media/fd/HDD/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv') as f:
    
    for idx, line in enumerate(f):
        user_id, artist_id, artist_name, total_plays = line.split('\t')  # ASCII Horizontal Tab (TAB)
        total_plays = int(total_plays)
        if user_id not in users:
            users[user_id] = len(users)
        if artist_id not in artists:
            artists[artist_id] = artist_name, len(artists)
        if total_plays > 500:
            data.append(total_plays)
            i.append(users[user_id])
            j.append(artists[artist_id][1])
            jnames.append(artists[artist_id][0])
            
coo_m = coo_matrix((data, (i, j)), [len(users), len(artists)], dtype='int32')

In [61]:
item_labels = np.asarray(jnames)

In [75]:
# Models

model = LightFM(loss='warp')
model.fit(coo_m, epochs=50, num_threads=8)

model2 = LightFM(loss='logistic')
model2.fit(coo_m, epochs=50, num_threads=8)

model3 = LightFM(loss='warp-kos')
model3.fit(coo_m, epochs=50, num_threads=8)

model4 = LightFM(loss='bpr')
model4.fit(coo_m, epochs=50, num_threads=8)

<lightfm.lightfm.LightFM at 0x7ff44b3e1b70>

In [82]:
def sample_recomendation(model, item_labels, matrix, user_ids):
    #generate recommendations for each user
    for user_id in user_ids:
        
        #artists already listened
        known_positives = item_labels[matrix.tocsr()[user_id].indices]
        
        #predicted new likes
        scores = model.predict(user_id, np.arange(matrix.shape[1]))
        
        #rank scores (descending)
        top_items = item_labels[np.argsort(-scores)]
        print("User %s" % user_id)
        print("Known positives:")
        
        for x in known_positives[:3]:
            print("          %s" % x)
            
        print("     Recommended:")
        
        for x in top_items[:3]:
            if x not in known_positives:
            print("          %s" % x)

In [103]:
ids = [2]

In [104]:
sample_recomendation(model, item_labels, coo_m, ids)

User 2
Known positives:
          goo goo dolls
          lostprophets
     Recommended:
          nine inch nails
          amy winehouse
          lapko


In [105]:
sample_recomendation(model2, item_labels, coo_m, ids)

User 2
Known positives:
          goo goo dolls
          lostprophets
     Recommended:
          lapko
          happy mondays
          amy winehouse


In [106]:
sample_recomendation(model3, item_labels, coo_m, ids)

User 2
Known positives:
          goo goo dolls
          lostprophets
     Recommended:
          amy winehouse
          rebellion


In [107]:
sample_recomendation(model4, item_labels, coo_m, ids)

User 2
Known positives:
          goo goo dolls
          lostprophets
     Recommended:
          devin townsend
          acid house kings
