In [1]:
import pandas as pd
import numpy as np
import implicit
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.sparse as sp

In [2]:
#read in data
le = pd.read_csv('nprs_final.csv',usecols=['user_id','track_id','created_at'])

In [3]:
les = le.sort_values(by='created_at')

In [4]:
#assign each event a count of 1
les['count'] = 1

In [5]:
#group by user/item id and sum counts
rating = les.groupby(['user_id','track_id'],as_index=False)['count'].sum()

In [6]:
#unique list of users
users = list(rating.user_id.unique())
#unique list of songs
songs = list(rating.track_id.unique())
#count of interactions per user/item
conf = list(rating['count'])

In [7]:
#assign sequential user and item indicies for sparse matrix
#assign category codes to user
cols = rating.user_id.astype('category').cat.codes
#assign category code to item
rows = rating.track_id.astype('category').cat.codes

In [11]:
#create sparse matrix of users/items with interaction count as entry
smat = sp.csr_matrix((conf,(rows,cols)),shape=(len(songs),len(users)))

In [12]:
smat

<81343x21220 sparse matrix of type '<class 'numpy.intc'>'
	with 1053887 stored elements in Compressed Sparse Row format>

In [13]:
matrix_size = smat.shape[0]*smat.shape[1] # Number of possible interactions in the matrix
num_listens = len(smat.nonzero()[0]) # Number of items interacted with
sparsity = 100*(1 - (num_listens/matrix_size))
print(f'Sparsity of the user/item matrix is {sparsity}')

Sparsity of the user/item matrix is 99.93894398121415


In [14]:
#import implicit library als, eval metric, and split function
from implicit.evaluation import precision_at_k, train_test_split
from implicit.als import AlternatingLeastSquares

In [15]:
#create train and test set
train, test = train_test_split(smat)

In [16]:
#instantiate and fit model
model = AlternatingLeastSquares(factors=130, regularization=20, iterations=20)
model.fit(train)



HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




### Evaluation: Precision @ k

Due to not having access to the song/artist names, we are limited to a qunatitative accessment of how well the model is performing.  For this metric we choose. precision at k. P @ k is chosen because it is a good indicator to how well the model is performing, and it is an intuitive metric.  Precision at k is the proportion of recommended items in the top-k set that are relevant.  If we set k = 10, then 3 relevant items in the top k recommendations would yield precision at k of 30%. 

In [17]:
ALS_precision_at_k = precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10, num_threads=4)

HBox(children=(FloatProgress(value=0.0, max=21220.0), HTML(value='')))




In [18]:
ALS_precision_at_k

0.06839781561453441

For the ALS model, we get a precision at k of .68%.  That means that less than 1 item in out top ten recommendations we relevant to the user.  Obviously, we would like to so better than that.  Generally speaking, when I qualitatively evaluate my recommended songs on Spotify, I would say that precision at k would fall somewhere between 30 - 40%.  However, the Spotify model is a sophisticated hybrid model, so one would expect good p@k scores. 

The low p@k score for our model is most likely due to having a very sparse matix which is a characteristic of the cold start problem.

Another possible way to evaulate the model would be to get recommendations for a specific user, and then get recommendations of a similar user.  We would then compare the lists and see if any recommended items appear for each user.  We can do this by making lists of the recommended items and looking at the intersection of those lists.  We can extend that by taking any items that appear in both lists, and finding similar items.  We could then take that list and cross-reference to see if any similar items appears in the recommendations for the user/similar user.

In [60]:
#recommend items for user 937
recs937 = model.recommend(937, smat)

#grab items and store in list
reclist_937 = [item for item, _ in recs937]

In [20]:
#Find similar users to user 937
model.similar_users(937)

[(937, 0.045337625),
 (13543, 0.02613195),
 (20334, 0.02596296),
 (17891, 0.025756504),
 (6987, 0.025645282),
 (3085, 0.024724733),
 (737, 0.02455601),
 (3368, 0.024459321),
 (16248, 0.024253428),
 (12122, 0.023699267)]

In [64]:
#recommend song to user closest to 937
similar_user_items = model.recommend(13543,smat)

#grab items recommended to similar user
reclist_similar_user =  [item for item, _ in similar_user_items]

In [70]:
# check if any items overlap similar user
set(reclist_similar_user).intersection(set(reclist_937))

{50405, 55849}

In [66]:
#Check if similar items are in user 937 or similar users rec list
sim_songs = model.similar_items(55489)
sim_song_list = [x for x, _ in sim_songs]

In [68]:
set(sim_song_list).intersection(set(reclist_937))

set()

In [69]:
set(sim_song_list).intersection(set(reclist_similar_user))

set()

The results show that songs 50405 and 55849 both appears in user 937's recommendation list and that users most similar user (user 13543) recommendation list.  However, when we find items most similar to song 55489, none of the returned items appear in the recommendation list for either user.  

In [25]:
# import lightfm library and auc_score function
from lightfm import LightFM # model
from lightfm.evaluation import precision_at_k



In [26]:
#instantiate light fm model with 130 components (same as als model)
# se 'loss' equal to 'warp' to optimize precision at k
modelfm = LightFM(
    no_components=130,
    learning_rate=0.05,
    loss='warp',
    random_state=2019)

In [27]:
#import lightfm dataset class
from lightfm.data import Dataset

In [28]:
#fit dataset class with user/items while supplying no content features
dataset = Dataset()
dataset.fit(
    users, 
    songs,
    item_features=None, 
    user_features=None)

In [29]:
#fit the model on the training data
modelfm.fit(
    train,
    item_features=None,
    user_features=None, sample_weight=None,
    epochs=5, num_threads=4, verbose=True)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4


<lightfm.lightfm.LightFM at 0x2422a201888>

In [30]:
#score the model on the test data
test_score = precision_at_k( 
        modelfm, test, 
        item_features=None, 
        user_features=None, 
        num_threads=4)

In [32]:
test_score.mean()

0.039319575

The lightfm documentation states that any user/item pair with no interaction is set to zero, which reflects in the test_score mean.  We need to remove the zero values and recalculate the get the average precision at k

In [71]:
#import counter to create dictionary of values
from collections import Counter

In [34]:
test_score_dict = Counter(test_score)

In [35]:
test_score_dict

Counter({0.0: 36492,
         0.2: 2769,
         0.1: 13104,
         0.3: 560,
         0.4: 112,
         0.5: 17,
         0.6: 1})

In [37]:
# we see that 36492 user/item pairs had no interaction
#docs state that indices with zero interaction get a score of zero, remove the zeros and calculate average
test_no_zero = [x for x in test_score if x != 0.0]

#calculate mean test p@k
mean_test_pak = sum(test_no_zero) / len(test_no_zero)

print(mean_test_pak)

0.12594940742292332


In [38]:
#calculate train precision @ k for comparison
train_score = precision_at_k( 
        modelfm, train, 
        item_features=None, 
        user_features=None, 
        num_threads=4)

In [40]:
#create count dictionary
train_score_dict = Counter(train_score)

In [41]:
train_score_dict

Counter({0.1: 27993,
         0.0: 18947,
         0.8: 335,
         0.2: 15161,
         0.3: 7529,
         0.4: 3949,
         0.6: 1311,
         0.7: 727,
         0.5: 2333,
         1.0: 25,
         0.9: 107})

In [72]:
#remove zeros from list
train_no_zero = [x for x in train_score if x != 0.0]
#calculate train average precision @ k
mean_train_pak = sum(train_no_zero) / len(train_no_zero)

print(mean_train_pak)

0.21054481628080146


In [73]:
 mean_test_pak-ALS_precision_at_k

0.05755159180838891

We do see an improvement of 0.057 in the precision @ k when compared to the ALS model.  The lightfm model yielded a score of 0.12.  It is obvious both models suffer from a spare user/item matrix and the cold start problem.  This can be addressed by creating a hybrid system the incorporates meta-data from the users and items.  Lightfm was chosen because of its ability to incorporate these features.  However, after spending considerable time attempting to build the hybrid model without success, the task is left for a future iteration of the project.