In [2]:
import numpy as np
import pandas as pd 
import random
from tqdm import tqdm
import scipy.sparse as sp
import sklearn.preprocessing as sk
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import MultiLabelBinarizer, normalize
import sys
sys.path.append("./")
import cosine_similarity

ModuleNotFoundError: No module named 'cosine_similarity'

In [2]:
class Utils(object):

    def __init__(self, train, tracks, target_playlists):
        self.train = train
        self.tracks = tracks
        self.target_playlists = target_playlists

    def get_target_playlists(self):
        return self.target_playlists

    def get_top10_tracks(self, URM, my_id, row):
        my_indices = URM.indices[URM.indptr[my_id]:URM.indptr[my_id + 1]]
        target_indices = list(np.intersect1d(row.indices, my_indices))
        row[0, target_indices] = 0
        row.eliminate_zeros()
        top10_tracks = row.toarray().flatten().argsort()[-10:][::-1]
        return top10_tracks

    def get_URM(self):
        grouped = self.train.groupby('playlist_id', as_index=True).apply((lambda playlist: list(playlist['track_id'])))
        URM = MultiLabelBinarizer(classes=self.tracks['track_id'].unique(), sparse_output=True).fit_transform(grouped)
        return URM

    def get_UCM(self, URM):
        UCM = TfidfTransformer().fit_transform(URM.T).T
        return normalize(UCM, 'l2', 0).tocsr()

    def get_ICM(self):  # returns Item Content Matrix
        grouped = self.tracks.groupby('track_id', as_index=True).apply((lambda track: list(track['artist_id'])))
       
        ICM_artists = MultiLabelBinarizer(classes=self.tracks['artist_id'].unique(), sparse_output=True).fit_transform(
            grouped)
        ICM_artists = ICM_artists * 0.8 #best weight for the artis feature
        ICM_artists = TfidfTransformer().fit_transform(ICM_artists.T).T

        grouped = self.tracks.groupby('track_id', as_index=True).apply((lambda track: list(track['album_id'])))
        ICM_albums = MultiLabelBinarizer(classes=self.tracks['album_id'].unique(), sparse_output=True).fit_transform(
            grouped)
        ICM_albums = TfidfTransformer().fit_transform(ICM_albums.T).T

        ICM = sp.hstack((ICM_artists, ICM_albums))
        return normalize(ICM, 'l2', 0).tocsr()

    def get_itemsim_CB(self, knn):
        ICM = self.get_ICM()
        
        similarity = cosine_similarity.Cosine_Similarity(dataMatrix=ICM.T,normalize=True,shrink=100,mode='cosine', topK=knn)
        S = similarity.compute_similarity()
        
        return S.tocsr()
    
    def get_itemsim_CF(self, URM, knn):
        UCM = self.get_UCM(URM)
        
        similarity = cosine_similarity.Cosine_Similarity(dataMatrix=UCM,normalize=True,shrink=100,mode='cosine', topK=knn)
        S = similarity.compute_similarity()
        
        return S.tocsr()
    
    def get_usersim_CF(self, URM, knn):
        UCM = self.get_UCM(URM)
        
        similarity = cosine_similarity.Cosine_Similarity(dataMatrix=UCM.T,normalize=True,shrink=100,mode='cosine', topK=knn)
        S = similarity.compute_similarity()
        
        return S.tocsr()

In [3]:
class Eval(object):

    def __init__(self, u):
        self.URM = u.get_URM()
        self.target_playlists = None
        self.URM_train = None
        self.build_URM_test()

    def build_URM_test(self):
        total_users = self.URM.shape[0]
        self.URM_train = self.URM.copy().tolil()
        possibile_playlists = [i for i in range(total_users) if len(
            self.URM.indices[self.URM.indptr[i]:self.URM.indptr[i + 1]]) > 10]  # playlists with more than 10 songs
        
        self.target_playlists = pd.DataFrame(data=random.sample(possibile_playlists, int(0.20 * total_users)),
                                             columns=['playlist_id'])  # target playlists, 20% of total playlists
        self.target_tracks = []
        
        for idx in list(self.target_playlists['playlist_id']):
            target_songs = random.sample(list(self.URM.indices[self.URM.indptr[idx]:self.URM.indptr[idx + 1]]), 10)
            self.URM_train[idx, target_songs] = 0
            self.target_tracks.append(target_songs)

        self.target_tracks = np.array(self.target_tracks)
        self.URM_train = self.URM_train.tocsr()

    def get_URM_train(self):
        return self.URM_train

    def get_URM(self):
        return self.URM

    def get_target_playlists(self):
        return self.target_playlists
    
    def get_target_tracks(self):
        return self.target_tracks

    def AP(self, recommended_items, relevant_items):
        relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
        p_at_k = relevant * np.cumsum(relevant, dtype=np.float32) / (1 + np.arange(relevant.shape[0]))
        map_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], relevant.shape[0]])
        return map_score

    # input has to be the URM and the dataframe returned by the recommender
    # NB: the songs in the dataframe must be a list (or ndarray), not a string!
    def MAP(self, df, relevant_items):
        print("Evaluating", flush = True)
        MAP = 0.0
        num_eval = 0

        for i in range(df.shape[0]):
            relevant = relevant_items[i]
            if len(relevant_items) > 0:
                recommended_items = df['track_ids'][i]
                num_eval += 1
                MAP += self.AP(recommended_items, relevant)

        MAP /= num_eval
        print("Recommender performance is {:.8f}".format(MAP))
        
        
    def result_diff(self, result_dfs):

    #load  all results form various recommenders
    #for file in files:
    #   results.append(pd.read_csv(file))
    
        for i, result in enumerate(result_dfs):
            for j, result_2 in enumerate(result_dfs):
                tot_diff = 0
                for row, row_2 in zip(result['track_ids'], result_2['track_ids']):
                    row, row_2 = list(row), list(row_2)
                    row = [el for el in row if el != ' ']
                    row_2 = [el for el in row_2 if el != ' ']
                    tot_diff += [1 for x,y in zip(row,row_2) if x!=y].count(1)
                print('Total differences between res %d and res %d are: %d' % (i, j, tot_diff))

In [4]:
class Eval2(object): #this is the version no bias, to be fixed

    def __init__(self, u):
        self.URM = u.get_URM()
        self.URM_train = None
        self.URM_test = None
        self.target_playlists = None
        self.build_URM()

    def build_URM(self):
        itemList = self.URM.indices
        ratingList = self.URM.data
        userList = []

        for i in range(self.URM.shape[0]):
            userList.extend(len(self.URM.indices[self.URM.indptr[i]:self.URM.indptr[i+1]])*[i])

        train_test_split = 0.80
        numInteractions = self.URM.nnz

        train_mask = np.random.choice([True,False], numInteractions, p = [train_test_split, 1-train_test_split])
        test_mask = np.logical_not(train_mask)

        userList = np.array(userList)
        itemList = np.array(itemList)
        ratingList = np.array(ratingList)

        self.URM_train = sp.coo_matrix((ratingList[train_mask], (userList[train_mask], itemList[train_mask])), shape = self.URM.shape)
        self.URM_train = self.URM_train.tocsr()
        
        self.URM_test = sp.coo_matrix((ratingList[test_mask], (userList[test_mask], itemList[test_mask])), shape = self.URM.shape)
        self.URM_test = self.URM_test.tocsr()
        
        self.target_playlists = np.random.choice(np.unique(userList[test_mask]),10000, replace = False)
        

    def get_URM_test(self):
        return self.URM_test

    def get_URM_train(self):
        return self.URM_train

    def get_target_playlists(self):
        return self.target_playlists

    def AP(self, recommended_items, relevant_items):
        relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
        p_at_k = relevant * np.cumsum(relevant, dtype=np.float32) / (1 + np.arange(relevant.shape[0]))
        map_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], relevant.shape[0]])
        return map_score

    # input has to be the URM and the dataframe returned by the recommender
    # NB: the songs in the dataframe must be a list (or ndarray), not a string!
    def MAP(self, df):
        print("Evaluating", flush = True)
        MAP = 0.0
        num_eval = 0

        for i in range(df.shape[0]):
            current = df['playlist_id'][i]
            relevant = self.URM_test[current].indices
            if len(relevant) > 0:
                recommended_items = df['track_ids'][i]
                num_eval += 1
                MAP += self.AP(recommended_items, relevant)

        MAP /= num_eval
        print("Recommender performance is {:.8f}".format(MAP))
        
        
    def result_diff(self, result_dfs):

    #load  all results form various recommenders
    #for file in files:
    #   results.append(pd.read_csv(file))
    
        for i, result in enumerate(result_dfs):
            for j, result_2 in enumerate(result_dfs):
                tot_diff = 0
                for row, row_2 in zip(result['track_ids'], result_2['track_ids']):
                    row, row_2 = list(row), list(row_2)
                    row = [el for el in row if el != ' ']
                    row_2 = [el for el in row_2 if el != ' ']
                    tot_diff += [1 for x,y in zip(row,row_2) if x!=y].count(1)
                print('Total differences between res %d and res %d are: %d' % (i, j, tot_diff))

In [5]:
class SlimBPR(object):
    
    def __init__(self, 
                 URM, 
                 learning_rate=0.01,
                epochs=1,
                positive_item_regularization=1.0,
                negative_item_regularization=1.0,
                nnz=1):
        self.URM = URM
        self.learning_rate=learning_rate
        self.epochs = epochs
        self.positive_item_regularization = positive_item_regularization
        self.negative_item_regularization = negative_item_regularization
        self.nnz = nnz
        self.n_playlist = self.URM.shape[0]
        self.n_track = self.URM.shape[1]
        
        self.similarity_matrix = sp.lil_matrix((self.n_track, self.n_playlist))
    
    def sample(self):
        
        playlist_id = np.random.choice(self.n_playlist)
        
        #get tracks in playlist and choose one
        tracks = self.URM[playlist_id, :].indices
        pos_track_id = np.random.choice(tracks)
        
        negTrackSelected = False
        
        while(not negTrackSelected):
            neg_track_id = np.random.choice(self.n_track)
            if(neg_track_id not in tracks):
                negTrackSelected = True
        return playlist_id, pos_track_id, neg_track_id
    
    def epochIteration(self):
        
        numPosInteractions = int(self.URM.nnz * self.nnz)
        
        #sampling without replacement
        #tqdm performs range op with progress visualization
        for num_sample in tqdm(range(numPosInteractions)):
            
            playlist_id, pos_track_id, neg_track_id = self.sample()
            
            tracks = self.URM[playlist_id, :].indices
            
            # Prediction
            x_i = self.similarity_matrix[pos_track_id, tracks].sum()
            x_j = self.similarity_matrix[neg_track_id, tracks].sum()
            
            #Gradient
            x_ij = x_i - x_j
            
            gradient = 1 / (1 + np.exp(x_ij))
            
            for i in tracks:
                #dp and dn outside for?
                dp = gradient - self.positive_item_regularization * x_i
                self.similarity_matrix[pos_track_id, i] = self.similarity_matrix[pos_track_id, i] + self.learning_rate * dp
                dn = gradient - self.negative_item_regularization * x_j
                self.similarity_matrix[neg_track_id, i] = self.similarity_matrix[neg_track_id, i] - self.learning_rate * dn
            
            self.similarity_matrix[pos_track_id, pos_track_id] = 0
            self.similarity_matrix[pos_track_id, pos_track_id] = 0
            
    def get_S_SLIM_BPR(self, knn):
        print('get S Slim BPR...')
        
        for numEpoch in range(self.epochs):
            print('Epoch: ', numEpoch)
            self.epochIteration()
        
        #replace with our own knn methods
        print('Keeping only knn =', knn, '...')
        similarity_matrix_csr = self.similarity_matrix.tocsr()
        
        for row in tqdm(range(0, similarity_matrix_csr.shape[0])):
            ordered_indices = similarity_matrix_csr[row,:].data.argsort()[:-knn]
            similarity_matrix_csr[row,:].data[ordered_indices] = 0
        sp.csr_matrix.eliminate_zeros(similarity_matrix_csr)
        
        return similarity_matrix_csr

In [6]:
class Item_CBR(object):

    def __init__(self, u):
        self.u = u
        self.URM = None
        self.target_playlists = None
        self.S = None

    def fit(self, URM, target_playlists, knn):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S = self.u.get_itemsim_CB(knn)

    def recommend(self, is_test):
        print("Recommending", flush = True)
        R = self.URM * self.S
        final_result = pd.DataFrame(index = range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))

        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [7]:
class Item_CFR(object):

    def __init__(self, u):
        self.u = u
        self.URM = None
        self.target_playlists = None
        self.S = None

    def fit(self, URM, target_playlists, knn):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S = self.u.get_itemsim_CF(self.URM, knn)
        
    def recommend(self, is_test):
        print("Recommending", flush = True)
        R = self.URM * self.S
        final_result = pd.DataFrame(index=range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))

        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [8]:
class User_CFR(object):
    
    def __init__(self, u):
        self.u = u
        self.URM = None
        self.target_playlists = None
        self.S = None
        
    def fit(self, URM, target_playlists, knn):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S = self.u.get_usersim_CF(self.URM, knn)
        
    def recommend(self, is_test):
        print("Recommending", flush = True)
        R = self.S * self.URM
        final_result = pd.DataFrame(index = range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))

        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [9]:
class Ensemble_item(object):
    
    def __init__(self, u):
        self.u = u
        self.S_CB = None
        self.S_CF = None
        self.target_playlists = None
        self.URM = None
    
    def fit(self, URM, target_playlists, knn1, knn2):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S_CF = self.u.get_itemsim_CF(self.URM, knn1)
        self.S_CB = self.u.get_itemsim_CB(knn2)
        
    def recommend(self, is_test, alfa):
        print("Recommending", flush = True)
        R_CB = self.URM * self.S_CB
        R_CF = self.URM * self.S_CF
        R = (alfa*R_CF) + ((1-alfa)*R_CB)
        final_result = pd.DataFrame(index=range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))
        
        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [10]:
class Hybrid(object):
    
    def __init__(self, u):
        self.u = u
        self.S_CB = None
        self.S_CF_item = None
        self.S_CF_user = None
        self.target_playlists = None
        self.URM = None
        
    def fit(self, URM, target_playlists, knn1, knn2, knn3):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S_CF_item = self.u.get_itemsim_CF(self.URM, knn1)
        self.S_user = self.u.get_usersim_CF(self.URM, knn2)
        self.S_CB = self.u.get_itemsim_CB(knn3)
        
    def recommend(self, is_test, weights):
        print("Recommending", flush = True)
        alfa = weights[0]
        beta = weights[1]
        S_item = (alfa*self.S_CF_item) + ((1-alfa)*self.S_CB)
        R_user = self.S_user * self.URM
        R_item = self.URM * S_item
        R = (beta*R_item) + ((1-beta)*R_user)
        final_result = pd.DataFrame(index=range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))
        
        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [11]:
class Ensemble_cfcb(object):
    
    def __init__(self, u):
        self.u = u
        self.S_CB = None
        self.S_CF_I = None
        self.S_CF_U = None
        self.target_playlists = None
        self.URM = None
    
    def fit(self, URM, target_playlists, knn1, knn2, knn3):
        self.URM = URM
        self.target_playlists = target_playlists
        self.S_CF_I = self.u.get_itemsim_CF(self.URM, knn1)
        self.S_CF_U = self.u.get_usersim_CF(self.URM, knn2)
        self.S_CB = self.u.get_itemsim_CB(knn3)  
        
    def recommend(self, is_test, weights):
        print("Recommending", flush = True)
        R_CB = self.URM * self.S_CB
        R_CF_I = self.URM * self.S_CF_I
        R_CF_U = self.S_CF_U * self.URM
        R = (weights[0]*R_CF_I) + (weights[1]*R_CF_U) + (weights[2]*R_CB)
        final_result = pd.DataFrame(index=range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))
        
        for i, target_playlist in tqdm(enumerate(np.array(self.target_playlists))):
            result_tracks = self.u.get_top10_tracks(self.URM, target_playlist[0], R[target_playlist[0]])
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][i] = int(target_playlist)
            if is_test:
                final_result['track_ids'][i] = result_tracks
            else:
                final_result['track_ids'][i] = string_rec

        return final_result

In [12]:
class SlimBPRRec(object):
    
    def fit(self, URM, Slim, target_playlists, num_playlist_to_recommend,
           learning_rate, epochs, positive_item_regularization,
           negative_item_regularization, nzz, u):
        self.URM = URM
        self.target_playlists = target_playlists
        self.num_playlist_to_recommend = num_playlist_to_recommend
        self.Slim = Slim
        self.u = u
    
    def recommend(self, is_test):
        self.is_test = is_test
        
        final_result = pd.DataFrame(index=range(self.target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))
        
        print('Predicting...', flush=True)
        for j, i in tqdm(enumerate(np.array(self.target_playlists))):

            URM_row = self.URM[i,:] * self.Slim

            #Make prediction
            result_tracks = self.u.get_top10_tracks(self.URM, i[0], URM_row)
            string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
            final_result['playlist_id'][j] = int(i)
            if is_test:
                final_result['track_ids'][j] = result_tracks
            else:
                final_result['track_ids'][j] = string_rec
        
        return final_result

In [13]:
class Run(object):

    def __init__(self):
        self.train = pd.read_csv("data/train.csv")
        self.tracks = pd.read_csv("data/tracks.csv")
        self.target_playlists = pd.read_csv("data/target_playlists.csv")
        self.u = Utils(self.train, self.tracks, self.target_playlists)
        self.e = Eval(self.u)
        self.URM_full = self.u.get_URM()
        self.URM_train = self.e.get_URM_train()

    def recommend_itemCBR(self, is_test, knn = 300):
        rec = Item_CBR(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn)
            result = rec.recommend(True)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn)
            result = rec.recommend(False)
            result.to_csv("predictions/item_cbr.csv", index=False)
            
    def recommend_itemCFR(self, is_test, knn = 400):
        rec = Item_CFR(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn)
            result = rec.recommend(True)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn)
            result = rec.recommend(False)
            result.to_csv("predictions/item_cfr.csv", index=False)
            
    def recommend_userCFR(self, is_test, knn = 400):
        rec = User_CFR(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn)
            result = rec.recommend(True)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn)
            result = rec.recommend(False)
            result.to_csv("predictions/user_cfr.csv", index = False)
    
    def recommend_ensemble_item(self, is_test, alfa = 0.7, knn1 = 400, knn2 = 400):
        rec = Ensemble_item(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn1, knn2)
            result = rec.recommend(True, alfa)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn1, knn2)
            result = rec.recommend(False, alfa)
            result.to_csv("predictions/item_avg.csv", index = False)
    
    def recommend_ensemble_cfcb(self, is_test, weights = [0.6, 0.4, 0.5], knn1 = 400, knn2 = 400, knn3 = 300):
        rec = Ensemble_cfcb(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn1, knn2, knn3)
            result = rec.recommend(True, weights)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn1, knn2, knn3)
            result = rec.recommend(False, weights)
            result.to_csv("predictions/ensemble1.csv", index = False)
            
    def recommend_hybrid(self, is_test, weights = [0.7, 0.65], knn1 = 400, knn2 = 400, knn3 = 300):
        rec = Hybrid(self.u)
        if is_test:
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, target_playlists, knn1, knn2 ,knn3)
            result = rec.recommend(True, weights)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, target, knn1, knn2, knn3)
            result = rec.recommend(False, weights)
            result.to_csv("predictions/hybrid.csv", index=False)
    
    def recommend_slimBPR(self, is_test, knn = 100):
        rec = SlimBPRRec()
        if is_test:
            BPR_gen = SlimBPR(self.URM_train)
            S_bpr = BPR_gen.get_S_SLIM_BPR(knn)
            target_playlists = self.e.get_target_playlists()
            rec.fit(self.URM_train, S_bpr, target_playlists, 10000,
            learning_rate=0.1, epochs=1, positive_item_regularization=1.0,
            negative_item_regularization=1.0, nzz=1, u=self.u)
            result = rec.recommend(True)
            self.e.MAP(result, self.e.get_target_tracks())
        else:
            BPR_gen = SlimBPR(self.URM_full)
            S_bpr = BPR_gen.get_S_SLIM_BPR(knn)
            target = self.u.get_target_playlists()
            rec.fit(self.URM_full, S_bpr, target, 10000,
            learning_rate=0.1, epochs=1, positive_item_regularization=1.0,
            negative_item_regularization=1.0, nzz=1, u=self.u)
            result = rec.recommend(False)
            result.to_csv("predictions/slimBPR.csv", index=False)

In [14]:
run = Run()

In [16]:
run.recommend_itemCFR(False)

Similarity column 13214 ( 64 % ), 440.46 column/sec, elapsed time 0.50 min
Similarity column 20635 ( 100 % ), 440.67 column/sec, elapsed time 0.78 min
Recommending


10000it [00:20, 488.03it/s]


In [15]:
URM = run.e.get_URM_train()
UCM = run.u.get_UCM(URM)

In [55]:
UCM

<50446x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 1110901 stored elements in Compressed Sparse Row format>

In [56]:
URM

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 1110901 stored elements in Compressed Sparse Row format>

In [None]:
new_row = [UCM[0]*

In [None]:
T = UCM.T.tocsr()

for row in UCM:
    new_row = row*T
    indices = new_row.data.argsort()[:-knn-1]
    new_row.data[indices] = 0
    sp.csr_matrix.eliminate_zeros(new_row)
    rows.append(new_row)

S = sp.vstack(rows).tolil()
#S.setdiag(0)
return S.tocsr()

In [19]:
UCM[0]

<1x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 5 stored elements in Compressed Sparse Row format>

In [45]:
similarity = cosine_similarity.Cosine_Similarity(dataMatrix=URM.T,normalize=True,shrink=100,mode='cosine', topK=400)

In [46]:
S = similarity.compute_similarity()

Similarity column 14384 ( 29 % ), 479.46 column/sec, elapsed time 0.50 min
Similarity column 28887 ( 57 % ), 481.43 column/sec, elapsed time 1.00 min
Similarity column 43366 ( 86 % ), 481.82 column/sec, elapsed time 1.50 min
Similarity column 50446 ( 100 % ), 482.22 column/sec, elapsed time 1.74 min


In [None]:
S1 = run.u.get_usersim_CF(URM, 400)

In [44]:
S1

<20635x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 4977571 stored elements in Compressed Sparse Row format>

In [49]:
S

<50446x50446 sparse matrix of type '<class 'numpy.float32'>'
	with 20178400 stored elements in Compressed Sparse Row format>

In [27]:
URM

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 1110901 stored elements in Compressed Sparse Row format>

In [36]:
target_playlists = run.e.target_playlists
is_test=True

In [47]:
print("Recommending", flush = True)
R = S*URM
final_result = pd.DataFrame(index = range(target_playlists.shape[0]), columns=('playlist_id', 'track_ids'))

for i, target_playlist in tqdm(enumerate(np.array(target_playlists))):
    result_tracks = run.u.get_top10_tracks(URM, target_playlist[0], R[target_playlist[0]])
    string_rec = ' '.join(map(str, result_tracks.reshape(1, 10)[0]))
    final_result['playlist_id'][i] = int(target_playlist)
    if is_test:
        final_result['track_ids'][i] = result_tracks
    else:
        final_result['track_ids'][i] = string_rec

final_result

Recommending


10089it [00:14, 681.06it/s]


Unnamed: 0,playlist_id,track_ids
0,37516,"[15476, 1149, 18874, 12783, 20282, 14178, 1372..."
1,47017,"[4743, 4148, 8138, 8356, 14907, 9231, 18404, 1..."
2,1664,"[80, 1895, 1196, 2930, 19317, 12639, 14702, 27..."
3,43867,"[12906, 15931, 16024, 13477, 11164, 4941, 1028..."
4,23818,"[11407, 2085, 16731, 20359, 1066, 15362, 2090,..."
5,13698,"[5846, 20268, 5031, 15428, 1144, 14994, 15495,..."
6,7403,"[344, 8145, 18991, 752, 2336, 6844, 19505, 111..."
7,7435,"[12940, 10100, 9163, 17634, 13622, 16100, 1946..."
8,32236,"[13368, 2705, 17538, 12963, 15209, 18142, 3463..."
9,24557,"[10848, 8491, 19377, 14008, 3830, 14277, 20134..."


In [48]:
run.e.MAP(final_result,run.e.get_target_tracks())

Evaluating
Recommender performance is 0.08871376
