In [1]:
import pickle
import numpy as np
import pandas as pd
from scipy import sparse
import implicit

In [2]:
song_info = pd.read_csv('../data/unique_tracks.txt', sep='<SEP>', header=None, engine='python')
song_info.columns = ['artist_id', 'song_id', 'artist_name', 'song_name']

In [3]:
%%time
with open('../recommedation_service/data/final.pkl', 'rb') as f:
    dataset = pickle.load(f)
dataset_s = sparse.csr_matrix(dataset.to_coo())

CPU times: user 25.9 s, sys: 2.7 s, total: 28.6 s
Wall time: 29.9 s


In [4]:
%%time
model = implicit.als.AlternatingLeastSquares(factors=30)
model.fit(dataset_s.T)

with open('../data/model.pkl', 'wb') as f:
    pickle.dump(model, f)

CPU times: user 10min 41s, sys: 4.15 s, total: 10min 45s
Wall time: 3min 12s


### Recommendation for existing user

In [5]:
%%time
def get_recommendation(user_id):
    user_items = dataset_s
    recommendations = model.recommend(user_id, 
                                      user_items,
                                     )
    return dataset.columns[list(np.array(recommendations)[:,0].astype('int'))]

print(get_recommendation(1019410))

Index(['Justin Bieber', 'Black Eyed Peas', 'Taylor Swift', 'Rihanna',
       'Miley Cyrus', 'Harmonia', 'Beyoncé', 'Katy Perry',
       'Lady GaGa / Colby O'Donis', 'Daft Punk'],
      dtype='object')
CPU times: user 4 ms, sys: 1.66 ms, total: 5.66 ms
Wall time: 3.57 ms


### Recommendation for new user

In [6]:
%%time
def get_recommendation_new(new_user_data):
    user_items = sparse.vstack((dataset_s, 
                                sparse.csr_matrix(new_user_data)))
    user_id = user_items.shape[0] - 1

    recommendations = model.recommend(user_id,
                                      user_items, 
                                      recalculate_user=True,
                                     )
    return dataset.columns[list(np.array(recommendations)[:,0].astype('int'))]


new_user_data = np.zeros(dataset_s.shape[1])
new_user_data[:100] = 1
np.random.shuffle(new_user_data)

print(get_recommendation_new(new_user_data))

Index(['Enya', 'The Avett Brothers', 'Fleet Foxes', 'Rammstein', 'Bon Iver',
       'Iron And Wine', 'Linkin Park', 'Coldplay', 'Amos Lee', 'Enigma'],
      dtype='object')
CPU times: user 151 ms, sys: 121 ms, total: 272 ms
Wall time: 271 ms


### Similar items

In [7]:
def get_similar_artist(item_id):
    similar = model.similar_items(item_id, N=10)
    return dataset.columns[list(np.array(similar)[:,0].astype('int'))]

get_similar_artist(11836)

Index(['Eminem / Dr. Dre / 50 Cent', 'Eminem / Dr. Dre', 'Eminem',
       'J-Black & Masta Ace', 'Eminem / Hailie Jade', 'Trick Trick / Eminem',
       'Dr. Dre / Eminem',
       'Eminem / Obie Trice / Stat Quo / Bobby Creekwater / Cashis',
       'Eminem / 50 Cent', 'Eminem / DMX / Obie Trice'],
      dtype='object')