# Import required packages

In [1]:
from collections import defaultdict
from surprise import SVD
from surprise import Dataset

# Define get top n function

In [2]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.
    
    Args:
    predictions(list of Prediction objects): The list of predictions, as
    returned by the test method of an algorithm.
    n(int): The number of recommendation to output for each user. Default
    is 10.
    ​
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
    [(raw item id, rating estimation), ...] of size n.
    '''
    
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Load data from song data file

In [3]:
from surprise import Reader
# read only about 10000 lines so that the computation works on tiny memory of your laptop
reader = Reader(sep=',', skip_lines=800000)
data = Dataset.load_from_file('song_data.csv', reader=reader)

# Split data into training and test set

In [4]:
trainset = data.build_full_trainset()
print trainset.n_ratings
testset = trainset.build_anti_testset()

7667


# Train SVD model

In [5]:
algo = SVD()
algo.train(trainset)

# Predict ratings for all pairs (u, i) that are NOT in the training set

In [6]:
predictions = algo.test(testset)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


# Print first 5 predictions

In [11]:
print predictions[:5]

[Prediction(uid='493f4e98adae9e7ef6a37bcfc9b4197973954b5f', iid='SOAPLWH12A8C140D4F', r_ui=3.6491456893178555, est=2.3122997578468376, details={u'was_impossible': False}), Prediction(uid='493f4e98adae9e7ef6a37bcfc9b4197973954b5f', iid='SOBYTGE12AB018CE3C', r_ui=3.6491456893178555, est=2.5914729821892615, details={u'was_impossible': False}), Prediction(uid='493f4e98adae9e7ef6a37bcfc9b4197973954b5f', iid='SOEOKUD12A58A7D288', r_ui=3.6491456893178555, est=5, details={u'was_impossible': False}), Prediction(uid='493f4e98adae9e7ef6a37bcfc9b4197973954b5f', iid='SOIEXJX12AB0187E15', r_ui=3.6491456893178555, est=2.4244188797239166, details={u'was_impossible': False}), Prediction(uid='493f4e98adae9e7ef6a37bcfc9b4197973954b5f', iid='SOKENKR12AB01828F7', r_ui=3.6491456893178555, est=2.6829148486351482, details={u'was_impossible': False})]


# Get top 10 recommendations for all user

In [12]:
top_n = get_top_n(predictions, n=10)

# Print the recommended items for each user

In [13]:
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

('690384e34847dfb46890462ce79bc61df20c6a0d', ['SOVRMZU12AB017FE90', 'SOEGIYH12A6D4FC0E3', 'SOHLLRP12A6701F2F4', 'SOOMUUY12A58A7B0EA', 'SONQEYS12AF72AABC9', 'SOSMEYT12A6D4FE1CE', 'SOCUYKP12A8C13CDD0', 'SOXXSVC12A582A242D', 'SOADVUP12AB0185246', 'SOTCMDJ12A6D4F8528'])
('3dcca3b6addf31dc420351c7cece8c11e80388b2', ['SOEGIYH12A6D4FC0E3', 'SOHLLRP12A6701F2F4', 'SOSMEYT12A6D4FE1CE', 'SOPABZM12A6D4FC668', 'SOXXSVC12A582A242D', 'SOQBMFK12A8C13835B', 'SOCNAXF12A6D4F9B34', 'SOKCXXE12A6D4F5A04', 'SOPJOZS12A670206E2', 'SOOMUUY12A58A7B0EA'])
('f6255ef4f5df81d81ff98f9fde5f19e0b9e28613', ['SOEGIYH12A6D4FC0E3', 'SOHLLRP12A6701F2F4', 'SOUFTBI12AB0183F65', 'SOWEHOM12A6BD4E09E', 'SOCUYKP12A8C13CDD0', 'SOQBMFK12A8C13835B', 'SOTCMDJ12A6D4F8528', 'SONWEHY12A58A796B5', 'SOVBBUR12AB0181734', 'SOKCXXE12A6D4F5A04'])
('5827384be24edd3bd6b981eb1f40950e1d985d02', ['SOEOKUD12A58A7D288', 'SOVQSQZ12A8C13F960', 'SOBNDDJ12A67AE228C', 'SOCMKES12A58A7AC2B', 'SOHJOZQ12A67AE228A', 'SOHWMBK12A67AE228D', 'SOKQGRH12A58A7BC39',