In [None]:
import boto3
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

In [None]:
data_dir = 'data' 
prefix = 'rcmdKNN'
input_data = sagemaker_session.upload_data(data_dir, key_prefix=prefix)

In [None]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(entry_point='train.py',
                    framework_version='0.23-1',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    py_version='py3',
                    source_dir='source',
                    image_uri=None,
                    hyperparameters = {'n_neighbors':101,
                                       'metric':'cosine',
                                       'algorithm':'brute'})

In [None]:
estimator.fit({'train': input_data})

In [None]:
from sagemaker.sklearn import SKLearnModel

sklearn_model = SKLearnModel(model_data=estimator.model_data,
                             role=role,
                             entry_point="predict.py",
                             source_dir='source',
                             framework_version='0.23-1')

predictor = sklearn_model.deploy(instance_type="ml.m4.xlarge", initial_instance_count=1)

In [None]:
import scipy
import numpy as np
train_data = scipy.sparse.load_npz('./data/artist_user_mtrx.npz')
train_data = np.array(train_data.todense())

In [None]:
import pickle
with open('./datasources/artist_to_idx.pkl', 'rb') as f:
    artist_to_idx = pickle.load(f)
    
with open('./datasources/idx_to_artist.pkl', 'rb') as f:
    idx_to_artist = pickle.load(f)

In [None]:
def process_input(artists, input_data):
    artist_ids = [artist_to_idx[i] for i in artists]
    artist_ids = input_data[artist_ids].reshape(1,-1)
    return artist_ids

def process_output(result, n):
    artists = [idx_to_artist[i] for i in result]
    return artists[:n]

In [177]:
def rcmnd_from_fav(user, data, num_preds = 100):
    play_history = data[:,user]
    artist_idx = (-play_history).argsort()[:1]
    predictions = predictor.predict(data[artist_idx].reshape(1,-1))
    return predictions[1:num_preds+1]

def hit_rate(user, data, predictions):
    hits = train_data[:, user][predictions]
    hit_rate = np.count_nonzero(hits) / len(hits)
    return hits

hits = []
for user in np.random.randint(0, train_data.shape[1], size=100):
    predictions = rcmnd_from_fav(user, train_data)
    process_output(predictions, 10)
    hits = hit_rate(user, train_data, predictions)
    hits+=hits
    
np.count_nonzero(hits) / len(hits)

0.23