In [9]:
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

In [10]:
data_dir = 'data' 
prefix = 'rcmdKNN'
input_data = sagemaker_session.upload_data(data_dir, key_prefix=prefix)

In [11]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(entry_point='train.py',
                    framework_version='0.23-1',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    py_version='py3',
                    source_dir='source',
                    image_uri=None,
                    hyperparameters = {'n_neighbors':101,
                                       'metric':'cosine',
                                       'algorithm':'brute'})

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [12]:
estimator.fit({'train': input_data})

2021-12-14 19:54:38 Starting - Starting the training job...
2021-12-14 19:55:01 Starting - Launching requested ML instancesProfilerReport-1639511678: InProgress
......
2021-12-14 19:56:01 Starting - Preparing the instances for training.........
2021-12-14 19:57:41 Downloading - Downloading input data...
2021-12-14 19:58:10 Training - Downloading the training image...
2021-12-14 19:58:42 Uploading - Uploading generated training model[34m2021-12-14 19:58:31,434 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-12-14 19:58:31,436 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-12-14 19:58:31,447 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-12-14 19:58:38,638 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-12-14 19:58:38,650 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus in

In [13]:
from sagemaker.sklearn import SKLearnModel

sklearn_model = SKLearnModel(model_data=estimator.model_data,
                             role=role,
                             entry_point="predict.py",
                             source_dir='source',
                             framework_version='0.23-1')

predictor = sklearn_model.deploy(instance_type="ml.m4.xlarge", initial_instance_count=1)

----------!

In [14]:
import scipy
import numpy as np
train_data = scipy.sparse.load_npz('./data/artist_user_mtrx.npz')
train_data = np.array(train_data.todense())

In [15]:
import pickle
with open('./datasources/artist_to_idx.pkl', 'rb') as f:
    artist_to_idx = pickle.load(f)
    
with open('./datasources/idx_to_artist.pkl', 'rb') as f:
    idx_to_artist = pickle.load(f)

In [48]:
def process_input(artists, input_data):
    artist_ids = [artist_to_idx[i] for i in artists]
    artist_ids = input_data[artist_ids].reshape(1,-1)
    return artist_ids

def process_output(result, n):
    artists = [idx_to_artist[i] for i in result]
    return artists[1:n+1]
    
for artist in ['miles davis', 'madonna', 'elvis presley']:
    print ('\nInput artist: {} \n'.format(artist))
    input_data = process_input([artist], train_data)
    response = predictor.predict(input_data)
    process_output(response, 15)


Input artist: miles davis 


Input artist: madonna 


Input artist: elvis presley 



In [None]:
def rcmnd_from_fav(user, data, num_preds = 100):
    play_history = data[:,user]
    artist_idx = (-play_history).argsort()[:1]
    predictions = predictor.predict(data[artist_idx].reshape(1,-1))
    return predictions[1:num_preds+1]

def hit_rate(user, data, predictions):
    hits = train_data[:, user][predictions]
    return hits

hits = []
num_users = 1000
preds_per_user = 10
for user in np.random.randint(0, train_data.shape[1], size=num_users):
    predictions = rcmnd_from_fav(user, train_data)
    process_output(predictions, preds_per_user)
    hits = hit_rate(user, train_data, predictions)
    hits+=hits
    
rate = (np.count_nonzero(hits) / len(hits))

print ('Artist Hit Rate for {} Users ({} Recommendations per User): {:.0%}%'\
       .format(num_users, preds_per_user, rate))

In [56]:
hit_rate(user, train_data, predictions)

TypeError: 'float' object is not callable