In [1]:
from surprise.dataset import Reader
from surprise import SVD
from surprise import Dataset

from collections import defaultdict

In [2]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [9]:
data = Dataset.load_builtin('ml-1m')
trainset = data.build_full_trainset()

Dataset ml-1m could not be found. Do you want to download it? [Y/n] Y
Trying to download dataset from http://files.grouplens.org/datasets/movielens/ml-1m.zip...
Done! Dataset ml-1m has been saved to /home/ec2-user/.surprise_data/ml-1m


In [10]:


# First train an SVD algorithm on the movielens dataset.
algo = SVD()
algo.train(trainset)







In [13]:
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [16]:
from surprise import dump

In [35]:
dump.dump('predictions_1m_dump.pkl', predictions=predictions, verbose=1) 

OSError: [Errno 28] No space left on device

In [28]:
print('hello')


hello


In [None]:
load_predictions = dump.load('predictions_1m_dump.pkl')

In [19]:
top_n = get_top_n(predictions, n=10)

In [40]:
# Print the recommended items for each user
dictionary = {}
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])
    dictionary[uid] = [iid for (iid, _) in user_ratings]

1 ['318', '953', '2905', '50', '858', '1234', '1262', '1203', '1307', '922']
2 ['527', '1035', '2905', '2762', '1704', '3753', '953', '1172', '745', '1307']
3 ['2324', '318', '110', '2905', '1704', '527', '745', '1213', '50', '1234']
4 ['904', '2186', '527', '1172', '3469', '2937', '3134', '858', '930', '318']
5 ['1217', '3741', '1218', '3429', '750', '111', '1348', '1136', '3462', '1199']
6 ['2905', '3916', '2609', '1236', '2861', '670', '53', '2324', '3675', '1271']
7 ['1136', '50', '2905', '318', '3338', '1197', '1250', '1923', '593', '1209']
8 ['745', '1136', '50', '2959', '923', '541', '2019', '750', '1148', '1200']
9 ['1198', '858', '2905', '1200', '260', '110', '1036', '1196', '1242', '1214']
10 ['3178', '2671', '2905', '670', '214', '2084', '1246', '3831', '2565', '1907']
11 ['1147', '2905', '223', '296', '58', '555', '1199', '1220', '745', '1193']
12 ['50', '318', '2905', '296', '1358', '670', '527', '1136', '1276', '2858']
13 ['2905', '3338', '858', '1250', '905', '953', '904

1154 ['904', '3022', '858', '260', '923', '2019', '750', '1267', '745', '2905']
1155 ['3552', '3421', '541', '858', '1258', '2791', '2905', '1204', '904', '1214']
1156 ['2905', '1947', '670', '527', '745', '1271', '318', '1293', '1193', '1250']
1157 ['527', '1207', '2324', '858', '1272', '2905', '670', '1198', '1221', '904']
1158 ['1035', '597', '3844', '590', '1271', '898', '2609', '2905', '1935', '2019']
1159 ['1136', '1214', '50', '858', '2905', '750', '1198', '1252', '1288', '1617']
1160 ['2905', '668', '296', '527', '318', '2360', '904', '912', '2324', '1148']
1161 ['3022', '3307', '1178', '1252', '1212', '1223', '2019', '1189', '1288', '3741']
1162 ['2905', '745', '3429', '1148', '1224', '3089', '3338', '2019', '904', '1214']
1163 ['3429', '720', '2905', '3679', '541', '1188', '750', '3038', '3083', '919']
1164 ['1035', '1250', '908', '1304', '1944', '556', '910', '3196', '1204', '1172']
1165 ['720', '745', '1952', '1148', '1178', '2905', '3030', '2959', '3429', '2019']
1166 ['60

2517 ['2501', '2609', '2203', '905', '1207', '17', '2905', '1111', '1674', '3916']
2518 ['527', '318', '593', '2905', '1234', '1704', '3196', '908', '905', '1250']
2519 ['2905', '326', '908', '905', '904', '745', '3365', '3462', '527', '318']
2520 ['1221', '1276', '923', '750', '1178', '912', '1204', '1193', '527', '3730']
2521 ['2905', '318', '3578', '3147', '1036', '1639', '2268', '3148', '780', '1784']
2522 ['1136', '2804', '1236', '932', '2788', '1206', '2731', '318', '2019', '1272']
2523 ['2997', '750', '3000', '2905', '1248', '3265', '1147', '1237', '3022', '1212']
2524 ['527', '318', '2905', '2762', '2431', '3408', '2324', '2028', '3916', '745']
2525 ['2905', '318', '527', '905', '213', '953', '969', '919', '1111', '3469']
2526 ['912', '1237', '307', '1300', '2935', '58', '2360', '678', '1104', '1207']
2527 ['3552', '1272', '318', '1234', '3421', '1220', '1262', '1136', '745', '904']
2528 ['3338', '3435', '2905', '527', '1204', '318', '2351', '2858', '904', '678']
2529 ['2905', 

3834 ['1197', '2791', '3030', '593', '1394', '162', '741', '1060', '3358', '1257']
3835 ['720', '527', '2762', '1196', '2571', '1221', '858', '3338', '922', '326']
3836 ['954', '1678', '1246', '904', '508', '2186', '3916', '3265', '3408', '920']
3837 ['1258', '326', '913', '1207', '3196', '1250', '912', '1272', '1204', '1262']
3838 ['2905', '904', '318', '3435', '2019', '668', '296', '50', '2360', '745']
3839 ['745', '1148', '3479', '911', '2396', '1223', '2905', '260', '720', '318']
3840 ['745', '1035', '905', '2905', '1148', '1223', '318', '3679', '1380', '720']
3841 ['3077', '3089', '363', '2503', '2970', '53', '2351', '123', '1237', '3416']
3842 ['527', '3030', '1188', '3728', '1046', '1912', '1649', '899', '3700', '1212']
3843 ['104', '2905', '1917', '1111', '170', '110', '2021', '2683', '3512', '3617']
3844 ['2791', '720', '527', '745', '2692', '608', '1537', '1103', '3468', '1213']
3845 ['2905', '3022', '3307', '1278', '913', '1224', '3030', '3435', '1136', '670']
3846 ['527', '

5231 ['2804', '318', '2858', '260', '541', '750', '50', '2762', '2360', '913']
5232 ['1243', '1204', '1237', '1235', '3022', '800', '2019', '1183', '50', '1080']
5233 ['260', '2905', '912', '3338', '953', '1214', '750', '1236', '2360', '1247']
5234 ['2019', '1178', '3730', '750', '3089', '3435', '1935', '2186', '162', '903']
5235 ['2905', '1233', '904', '50', '527', '3435', '3006', '1148', '1197', '1209']
5236 ['2905', '260', '1198', '527', '668', '3338', '1236', '670', '750', '2609']
5237 ['318', '745', '908', '50', '1147', '2905', '1223', '2324', '2804', '1148']
5238 ['953', '260', '2905', '1148', '1207', '2762', '670', '919', '912', '940']
5239 ['2905', '3435', '908', '922', '326', '924', '903', '3897', '745', '2066']
5240 ['1207', '50', '1204', '1224', '527', '745', '913', '2019', '953', '318']
5241 ['2905', '969', '1271', '2161', '1207', '1278', '1362', '1962', '934', '3451']
5242 ['745', '2908', '1354', '1223', '1212', '306', '2360', '2019', '930', '1289']
5243 ['32', '2959', '31

In [22]:
type(top_n)

collections.defaultdict

In [49]:
top_n['1']

[('318', 4.9240549429900469),
 ('953', 4.7987256694970561),
 ('2905', 4.7503789962907677),
 ('50', 4.7395102362300028),
 ('858', 4.7323646951316887),
 ('1234', 4.7236198350146061),
 ('1262', 4.6919157954277768),
 ('1203', 4.6600834299795979),
 ('1307', 4.6521249711490604),
 ('922', 4.6407619479685103)]

In [32]:
type(predictions)

list

In [34]:
predictions[:10]

[Prediction(uid='1', iid='1357', r_ui=3.5815644530293169, est=4.1618067298737831, details={'was_impossible': False}),
 Prediction(uid='1', iid='3068', r_ui=3.5815644530293169, est=4.122058371514612, details={'was_impossible': False}),
 Prediction(uid='1', iid='1537', r_ui=3.5815644530293169, est=4.3176799110625472, details={'was_impossible': False}),
 Prediction(uid='1', iid='647', r_ui=3.5815644530293169, est=3.7658719830688279, details={'was_impossible': False}),
 Prediction(uid='1', iid='2194', r_ui=3.5815644530293169, est=4.4040515676445109, details={'was_impossible': False}),
 Prediction(uid='1', iid='648', r_ui=3.5815644530293169, est=3.5809732661556621, details={'was_impossible': False}),
 Prediction(uid='1', iid='2268', r_ui=3.5815644530293169, est=4.3879938298705152, details={'was_impossible': False}),
 Prediction(uid='1', iid='2628', r_ui=3.5815644530293169, est=2.7729840280279969, details={'was_impossible': False}),
 Prediction(uid='1', iid='1103', r_ui=3.5815644530293169, e

In [26]:
import pandas as pd

TypeError: 'builtin_function_or_method' object is not iterable

In [50]:
dictionary

{'1': ['318',
  '953',
  '2905',
  '50',
  '858',
  '1234',
  '1262',
  '1203',
  '1307',
  '922'],
 '2': ['527',
  '1035',
  '2905',
  '2762',
  '1704',
  '3753',
  '953',
  '1172',
  '745',
  '1307'],
 '3': ['2324',
  '318',
  '110',
  '2905',
  '1704',
  '527',
  '745',
  '1213',
  '50',
  '1234'],
 '4': ['904',
  '2186',
  '527',
  '1172',
  '3469',
  '2937',
  '3134',
  '858',
  '930',
  '318'],
 '5': ['1217',
  '3741',
  '1218',
  '3429',
  '750',
  '111',
  '1348',
  '1136',
  '3462',
  '1199'],
 '6': ['2905',
  '3916',
  '2609',
  '1236',
  '2861',
  '670',
  '53',
  '2324',
  '3675',
  '1271'],
 '7': ['1136',
  '50',
  '2905',
  '318',
  '3338',
  '1197',
  '1250',
  '1923',
  '593',
  '1209'],
 '8': ['745',
  '1136',
  '50',
  '2959',
  '923',
  '541',
  '2019',
  '750',
  '1148',
  '1200'],
 '9': ['1198',
  '858',
  '2905',
  '1200',
  '260',
  '110',
  '1036',
  '1196',
  '1242',
  '1214'],
 '10': ['3178',
  '2671',
  '2905',
  '670',
  '214',
  '2084',
  '1246',
  '3831',


In [51]:
top_n['1']

[('318', 4.9240549429900469),
 ('953', 4.7987256694970561),
 ('2905', 4.7503789962907677),
 ('50', 4.7395102362300028),
 ('858', 4.7323646951316887),
 ('1234', 4.7236198350146061),
 ('1262', 4.6919157954277768),
 ('1203', 4.6600834299795979),
 ('1307', 4.6521249711490604),
 ('922', 4.6407619479685103)]

In [56]:
df2 = pd.DataFrame().from_dict(top_n, orient='index')

In [57]:
df2.to_csv('top10_users.csv', index=False)

In [60]:
df3 = pd.read_csv('top10_users.csv')

In [61]:
df3

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"('318', 4.9240549429900469)","('953', 4.7987256694970561)","('2905', 4.7503789962907677)","('50', 4.7395102362300028)","('858', 4.7323646951316887)","('1234', 4.7236198350146061)","('1262', 4.6919157954277768)","('1203', 4.6600834299795979)","('1307', 4.6521249711490604)","('922', 4.6407619479685103)"
1,"('527', 4.7771338971848269)","('1035', 4.675369382065651)","('2905', 4.6499224089248452)","('2762', 4.6208375951769742)","('1704', 4.590273202401062)","('3753', 4.5433771550488071)","('953', 4.5255341018206128)","('1172', 4.5159728156706249)","('745', 4.5142817589655149)","('1307', 4.4709004514870632)"
2,"('2324', 5)","('318', 4.9066444479783726)","('110', 4.8556656594023675)","('2905', 4.7344941196849923)","('1704', 4.7313613119964488)","('527', 4.6726802028903318)","('745', 4.5992730367552825)","('1213', 4.589917767347746)","('50', 4.5840304605085382)","('1234', 4.5696004114429334)"
3,"('904', 5)","('2186', 4.9689677813564375)","('527', 4.9545875514783582)","('1172', 4.9188868517242526)","('3469', 4.8951480954662747)","('2937', 4.8869292865800507)","('3134', 4.8816107561598248)","('858', 4.876849813118036)","('930', 4.8749258241728413)","('318', 4.8717547606732756)"
4,"('1217', 4.4439776231371404)","('3741', 4.4156485305009534)","('1218', 4.4115803376674867)","('3429', 4.4009360143967422)","('750', 4.3755511272404037)","('111', 4.3239205867656656)","('1348', 4.3225816518970692)","('1136', 4.3105175802417843)","('3462', 4.3100922668298471)","('1199', 4.3039926051372568)"
5,"('2905', 4.8531196960214329)","('3916', 4.6731591768249681)","('2609', 4.6662179437797988)","('1236', 4.6255034943333238)","('2861', 4.6027492417045766)","('670', 4.5940929898732872)","('53', 4.5894908026799461)","('2324', 4.5868510084686021)","('3675', 4.5803780639963589)","('1271', 4.5792299397765204)"
6,"('1136', 5)","('50', 5)","('2905', 5)","('318', 4.9678652728534036)","('3338', 4.9435152117214685)","('1197', 4.9411761502237148)","('1250', 4.9189252181718626)","('1923', 4.9107083893715693)","('593', 4.904406883863091)","('1209', 4.9020841965925248)"
7,"('745', 5)","('1136', 5)","('50', 5)","('2959', 5)","('923', 5)","('541', 5)","('2019', 5)","('750', 4.9967551799654144)","('1148', 4.9936265787645446)","('1200', 4.9803633212692802)"
8,"('1198', 4.7556724589118069)","('858', 4.7126048556722875)","('2905', 4.5699658010244031)","('1200', 4.5152287364228361)","('260', 4.4717917782704939)","('110', 4.4660821117079177)","('1036', 4.4375839806011408)","('1196', 4.4310918324026982)","('1242', 4.4165334933204399)","('1214', 4.3898984705171715)"
9,"('3178', 5)","('2671', 5)","('2905', 5)","('670', 5)","('214', 4.9047762709158205)","('2084', 4.8921686641680413)","('1246', 4.8782530935395148)","('3831', 4.8754032146658197)","('2565', 4.8649204569969022)","('1907', 4.8618461454877817)"
