In [32]:
#The basic code was borrowed by this kaggle notebook: https://www.kaggle.com/code/pegahpooya/spotify-playlists-recommender-system
#please give a like to the creator!!!!!

In [2]:
#Importing libraries

import numpy as np 
import pandas as pd 
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from scipy import sparse
import random
import lightfm
from lightfm import LightFM, cross_validation
from lightfm.evaluation import precision_at_k, auc_score
from sklearn.metrics.pairwise import cosine_similarity
import pickle

import matplotlib.pyplot as plt



In [3]:
p = 0.5 # 50% of data due to high ammount

df_playlist = pd.read_csv('data\spotify_dataset.csv', error_bad_lines=False, warn_bad_lines=False, skiprows=lambda i: i>0 and random.random() > p)
df_playlist.head()

Unnamed: 0,user_id,"""artistname""","""trackname""","""playlistname"""
0,9cc0cfd4d7d7885102480dd99e7a90d6,Tiffany Page,7 Years Too Late,HARD ROCK 2010
1,9cc0cfd4d7d7885102480dd99e7a90d6,Elvis Costello,Alison,HARD ROCK 2010
2,9cc0cfd4d7d7885102480dd99e7a90d6,Paul McCartney,Band On The Run,HARD ROCK 2010
3,9cc0cfd4d7d7885102480dd99e7a90d6,Lissie,Bright Side,HARD ROCK 2010
4,9cc0cfd4d7d7885102480dd99e7a90d6,Paul McCartney,Dance Tonight,HARD ROCK 2010


In [4]:
df_playlist.shape

(6445673, 4)

In [5]:
df_playlist.columns = df_playlist.columns.str.replace('"', '')
df_playlist.columns = df_playlist.columns.str.replace('name', '')
df_playlist.columns = df_playlist.columns.str.replace(' ', '')
df_playlist.columns

Index(['user_id', 'artist', 'track', 'playlist'], dtype='object')

In [6]:
#For recommender system, I'm only keeping the artists with frequency higher than 50
df_playlist = df_playlist.groupby('artist').filter(lambda x : len(x)>=50)
#And keeping the users with at least 10 unique artists in their playlists to lessen the impact of cold start problem
df_playlist = df_playlist[df_playlist.groupby('user_id').artist.transform('nunique') >= 10]

In [7]:
#group by to get the frequnecy count for each user and artist (# of times that an artist has appeared in playlists created by a user)
size = lambda x: len(x)
df_freq = df_playlist.groupby(['user_id', 'artist']).agg('size').reset_index().rename(columns={0:'freq'})[['user_id', 'artist', 'freq']].sort_values(['freq'], ascending=False)
df_freq.head()


Unnamed: 0,user_id,artist,freq
246766,26b51e580277e131f87e4c7ee4c0887a,Vitamin String Quartet,1654
1392284,defced0ece4ce946160b0d2698142eac,Vitamin String Quartet,1643
412399,414050deadb38aafd8d4ad22ca634055,Vitamin String Quartet,1267
10952,014e695cc6df96011b90a5beb3206012,Ilaiyaraaja,1131
1357910,d993ff8f2de226e2c6803e47a22e9d7e,Lata Mangeshkar,1126


In [8]:
#create a DF for artists and add artist id
df_artist = pd.DataFrame(df_freq["artist"].unique())
df_artist = df_artist.reset_index()
df_artist = df_artist.rename(columns={'index':'artist_id', 0:'artist'})
df_artist.head()

Unnamed: 0,artist_id,artist
0,0,Vitamin String Quartet
1,1,Ilaiyaraaja
2,2,Lata Mangeshkar
3,3,Billie Holiday
4,4,Peggy Lee


In [9]:
#merge
df_freq  = pd.merge(df_freq , df_artist, how='inner', on='artist')

In [10]:
df_freq.head()

Unnamed: 0,user_id,artist,freq,artist_id
0,26b51e580277e131f87e4c7ee4c0887a,Vitamin String Quartet,1654,0
1,defced0ece4ce946160b0d2698142eac,Vitamin String Quartet,1643,0
2,414050deadb38aafd8d4ad22ca634055,Vitamin String Quartet,1267,0
3,e78e1e7b93c32bc27bf458f6cb8a5554,Vitamin String Quartet,145,0
4,5ed466280594117226d840fed87a8bc6,Vitamin String Quartet,144,0


In [11]:
#Helpers functions are from the repo below:
#https://github.com/aayushmnit/cookbook/blob/master/recsys.py

In [12]:
def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
        - norm (optional) = True if a normalization of ratings is needed
        - threshold (required if norm = True) = value above which the rating is favorable
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

# https://github.com/aayushmnit/cookbook/blob/master/recsys.py
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict


# https://github.com/aayushmnit/cookbook/blob/master/recsys.py
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

# https://github.com/aayushmnit/cookbook/blob/master/recsys.py
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    
    #uncommented for train test split
#     x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

# https://github.com/aayushmnit/cookbook/blob/master/recsys.py
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Expected Output - 
        - Prints list of items the given user has already bought
        - Prints list of N recommended items  which user hopefully will be interested in
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
								 .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list
    
#https://github.com/Lab41/hermes/blob/master/src/algorithms/performance_metrics.py
def calculate_population_category_diversity(y_predicted, content_array):
    """
    The higher the category diversity the better.
    Function determines the total sum of the categories for all people (rating_array).
    So for a random group of users resulting in 330 predictions in MovieLens this could look like:
        [71, 34, 11, 22, 126, 128, 0, 165, 21, 0, 35, 0, 62, 100, 5, 131, 3, 0]
    The average of each component (by total number of predictions) is then taken
        [0.21, 0.1, 0.03....0]
    The component averages are summed
        2.79
    Finally a scaling factor is utilized to take into consideration the number of categories and the average categories for an item
        0.31
    This final step is to help normalize across datasets where some may have many more/less categories and/or more/less dense item categorization
    Args:
        y_predicted: predicted ratings in the format of a RDD of [ (userId, itemId, predictedRating) ]. Should be the n predicted ratings
        content_array: content feature array of the items which should be in the format of (item [content_feature vector])
    Returns:
        cat_diversity:
    """
    ave_coverage = content_array.map(lambda id, array: sum(array)).mean()
    rating_array_raw = y_predicted.keyBy(lambda row: row[1]).join(content_array).map(lambda id, rating, array: array).collect()
    rating_array = map(sum,zip(*np.array(rating_array_raw)))
    cat_diversity = sum([r/float(len(rating_array_raw)) for r in rating_array])*ave_coverage/float(len(rating_array))

    return cat_diversity

In [13]:
#prep data

interactions = create_interaction_matrix(df = df_freq, user_col = "user_id", item_col = 'artist_id', rating_col = 'freq', norm= False, threshold = None)
interactions.head()
#interactions.shape

artist_id,0,1,2,3,4,5,6,7,8,9,...,14270,14271,14272,14273,14274,14275,14276,14277,14278,14279
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00055176fea33f6e027cd3302289378b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0007f3dd09c91198371454c608d47f22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000b0f32b5739f052b9d40fcc5c41079,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000c11a16c89aa4b14b328080f5954ee,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00123e0f544dee3ab006aa7f1e5725a7,0.0,0.0,0.0,0.0,0.0,22.0,0.0,85.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#create user dict

user_dict = create_user_dict(interactions=interactions)

In [15]:
artists_dict = create_item_dict(df = df_artist, id_col = 'artist_id', name_col = 'artist')

In [16]:
#train-test split

x = sparse.csr_matrix(interactions.values)
train, test = lightfm.cross_validation.random_train_test_split(x, test_percentage=0.2, random_state=None)

In [17]:
#train model (Matrix Factorization)

%time
model = runMF(interactions = train,
                 n_components = 30,
                 loss = 'warp',
                 k = 10,
                 epoch = 30,
                 n_jobs = 4)

CPU times: total: 0 ns
Wall time: 1.07 ms


In [24]:
%time
model2 = runMF(interactions = train,
                 n_components = 30,
                 loss = 'warp',
                 k = 10,
                 epoch = 15,
                 n_jobs = 4)

CPU times: total: 0 ns
Wall time: 0 ns


In [17]:
# save the model to disk
#filename = 'rec_artists_model.sav'
#pickle.dump(model, open(filename, 'wb'))

In [18]:
# load the model from disk
#model = pickle.load(open(filename, 'rb'))

##  Evaluation of our model

Actual recommendation example

In [36]:
#example
rec_list = sample_recommendation_user(model = model, 
                                      interactions = interactions, 
                                      user_id = '0059ac6b3ba3c3f415f09059a3bd703a', 
                                      user_dict = user_dict,
                                      item_dict = artists_dict, 
                                      threshold = 0,
                                      nrec_items = 10,
                                      show = True)

Known Likes:
1- The Protomen
2- Dead Man's Bones
3- Salem
4- Birdy
5- Astronautalis
6- Burial
7- Ben Howard
8- James Blake
9- Caribou
10- Mew
11- Beyoncé

 Recommended Items:
1- Lana Del Rey
2- M83
3- The xx
4- Bon Iver
5- Grimes
6- Arcade Fire
7- Radiohead
8- Lykke Li
9- Daft Punk
10- Purity Ring


In [19]:
# defining evaluation functions:

# this function calculates the AP for specific k and then takes the mean ---> mAP
def precision_recall_curve(y_true, rec_model, thresholds):
    precisions = []
    recalls = []
    
    for threshold in thresholds:

        precision = precision_at_k(model, y_true, k=threshold).mean()
        recall = lightfm.evaluation.recall_at_k(model, y_true, k=threshold).mean()
        
        precisions.append(precision)
        recalls.append(recall)

    return precisions, recalls
    
def evaluate_my_lightfm_model(lfm_model,train,test,k,k_list):
    #evaluate light fm model 
    # 
    # Input: lfm_model: light fm model, train: train set, test: test set, k: chosen specific k (n-rank) for calculating AP and AR
    # k_list: list of k values to calculate mAP and mAR
    # Output: metrics AUC, AP and AR for specific k,F-score,mAR,mAP, for train and test,

    #AUC

    train_auc = auc_score(lfm_model, train, num_threads=4).mean()
    print('Train AUC: %s' % train_auc)


    test_auc = auc_score(lfm_model, test, train_interactions=train, num_threads=4).mean()
    print('Test AUC: %s' % test_auc)

    #AP

    train_precision = precision_at_k(lfm_model, train, k=10).mean()
    test_precision = precision_at_k(lfm_model, test, k=10).mean()
    print('for k=',k)
    print('Average Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

    #AR

    train_recall= lightfm.evaluation.recall_at_k(lfm_model, train, k=10).mean()
    test_recall = lightfm.evaluation.recall_at_k(lfm_model, test, k=10).mean()
    print('Average Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
    
    #F-score
    train_F1 = 2*(train_precision*train_recall)/(train_precision+train_recall)
    test_F1 = 2*(test_precision*test_recall)/(test_precision+test_recall)
    print('F1: train %.2f, test %.2f.' % (train_F1, test_F1))

    #thresholds = [1,3,5,7,9,10,12,14,16]

    #map and mar
    precisions, recalls = precision_recall_curve(y_true=test, 
                                             rec_model=lfm_model,
                                             thresholds=k_list)
    print('mAP: test %.2f,mAR: test %.2f.' % (np.mean(precisions), np.mean(recalls)))


In [21]:
k_list = [1,3,5,7,9,10,12,14,16]
k = 10

for model with 30 epochs and warp loss function:



In [22]:
evaluate_my_lightfm_model(model,train,test,k,k_list)

Train AUC: 0.96672505
Test AUC: 0.967067
for k= 10
Average Precision: train 0.40, test 0.10.
Average Recall: train 0.07, test 0.07.
F1: train 0.12, test 0.08.
mAP: test 0.10,mAR: test 0.06.


for the model with 15 epochs and warp loss function:

In [25]:
evaluate_my_lightfm_model(model2,train,test,k,k_list)

Train AUC: 0.9595474
Test AUC: 0.9597996
for k= 10
Average Precision: train 0.38, test 0.10.
Average Recall: train 0.06, test 0.06.
F1: train 0.11, test 0.08.
mAP: test 0.10,mAR: test 0.06.


for the model with 30 epochs and warp brp function:

In [27]:
%time
model3 = runMF(interactions = train,
                 n_components = 30,
                 loss = 'bpr',
                 k = 10,
                 epoch = 30,
                 n_jobs = 4)

evaluate_my_lightfm_model(model3,train,test,k,k_list)

CPU times: total: 0 ns
Wall time: 0 ns
Train AUC: 0.92326266
Test AUC: 0.9230062
for k= 10
Average Precision: train 0.36, test 0.09.
Average Recall: train 0.06, test 0.06.
F1: train 0.11, test 0.07.
mAP: test 0.10,mAR: test 0.06.


Evaluation metrics (experimenting):

AUC

In [37]:
print('model 1:')
train_auc = auc_score(model, train, num_threads=4).mean()
print('Train AUC: %s' % train_auc)


test_auc = auc_score(model, test, train_interactions=train, num_threads=4).mean()
print('Test AUC: %s' % test_auc)

Train AUC: 0.9668799
Test AUC: 0.96759695


Mean Precision for top 10 choices

Measure of exactness, determines the fraction of relevant items retrieved out of all timems retrieved

In [38]:
train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()
print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

Precision: train 0.40, test 0.10.


Mean Recall for top 10 choices

Measure of completeness, determines the fraction of relevant items retrieved out of all relevant items

In [42]:
train_recall= lightfm.evaluation.recall_at_k(model, train, k=10).mean()
test_recall = lightfm.evaluation.recall_at_k(model, test, k=10).mean()
print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))

Recall: train 0.07, test 0.07.


In [32]:
#looks like our algorithm is very selective

F1 measure

In [40]:
train_F1 = 2*(train_precision*train_recall)/(train_precision+train_recall)
test_F1 = 2*(test_precision*test_recall)/(test_precision+test_recall)
print('F1: train %.2f, test %.2f.' % (train_F1, test_F1))

F1: train 0.12, test 0.08.


mAP calculation

In [1]:
# this functions calculates the AP for specific k and then takes the mean ---> mAP
def precision_recall_curve(y_true, rec_model, thresholds):
    precisions = []
    recalls = []
    
    for threshold in thresholds:

        precision = precision_at_k(model, y_true, k=threshold).mean()
        recall = lightfm.evaluation.recall_at_k(model, y_true, k=threshold).mean()
        
        precisions.append(precision)
        recalls.append(recall)

    return precisions, recalls

In [47]:
# thresholds are the k s
thresholds = [1,3,5,7,9,10,12,14,16]

precisions, recalls = precision_recall_curve(y_true=test, 
                                             rec_model=model,
                                             thresholds=thresholds)

In [53]:
print('Average Precesions:')
print(precisions)
print('Average Recalls:')
print(recalls)

[0.11951447, 0.1112408, 0.10806101, 0.10471967, 0.10117751, 0.099610955, 0.09736487, 0.0951714, 0.09325397]
[0.009373848084690845, 0.02449769715409561, 0.03818192370015055, 0.05193485524191177, 0.06366570259700906, 0.06909975028537942, 0.07938703274282129, 0.08940442891123486, 0.09957145868477144]


In [52]:
print('mAP: test %.2f,mAR: test %.2f.' % (np.mean(precisions), np.mean(recalls)))

mAP: test 0.10,mAR: test 0.06.


In [None]:
# Next make two models
# Compare Preccision and Recall curves
# mAP is the mean for all the averages of ks