In [17]:
import surprise

In [18]:
import tensorflow as tf

In [142]:
from surprise import KNNWithMeans
from surprise import Dataset
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate
import zipfile
from surprise import Reader, Dataset, SVD

In [143]:
# Read data into an array of strings
with open('./ml-100k/u.data') as f:
    all_lines = f.readlines()

# Prepare the data to be used in Surprise
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file('./ml-100k/u.data', reader=reader)

In [144]:
# sim_options = {
#     "name": ["msd", "cosine"],
#     "min_support": [3, 4, 5],
#     "user_based": [False, True],
# }

# param_grid = {"sim_options": sim_options}

# gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
# gs.fit(data)

# print(gs.best_score["rmse"])
# print(gs.best_params["rmse"])

Grid search for best params

In [145]:
param_grid = {
    "n_epochs": [5, 10],
    "lr_all": [0.002, 0.005],
    "reg_all": [0.4, 0.6]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

0.9642551176571187
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


SVD algo chosen, with best params obtained from GS

In [146]:
algo = SVD(n_epochs=10, lr_all=0.005, reg_all=0.4)

Cross validation, 5 folds

In [147]:
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9586  0.9601  0.9639  0.9610  0.9630  0.9613  0.0019  
MAE (testset)     0.7680  0.7720  0.7729  0.7688  0.7701  0.7704  0.0019  
Fit time          1.80    1.82    1.83    1.83    1.80    1.82    0.01    
Test time         0.10    0.10    0.10    0.09    0.26    0.13    0.07    


{'test_rmse': array([0.95862191, 0.96008279, 0.96392547, 0.9610229 , 0.96298481]),
 'test_mae': array([0.76795441, 0.77204179, 0.77286055, 0.76876578, 0.77013   ]),
 'fit_time': (1.8049962520599365,
  1.8181238174438477,
  1.8250021934509277,
  1.8252167701721191,
  1.8040225505828857),
 'test_time': (0.09501338005065918,
  0.09596729278564453,
  0.09500384330749512,
  0.09299921989440918,
  0.2600100040435791)}

In [162]:
def predict_scores(data):
    # Build Training set. Needed to fit to create model.
    trainset = data.build_full_trainset()
    
    algo.fit(trainset)
    
    # Get all the user and item IDs
    user_ids = trainset.all_users()
    item_ids = trainset.all_items()

    # Create empty list to store predictions
    ratings = []
    
    # For loop, estimate rating of each user for every movie.
    for user_id in user_ids:
        for item_id in item_ids:
            
            prediction = algo.predict(str(user_id), str(item_id)).est
#             print(prediction)
            ratings.append(prediction)
            
    return ratings

In [181]:
import timeit

start = timeit.default_timer()


ratings = predict_scores(data)
    
stop = timeit.default_timer()

print('Time: ', stop - start)  

Time:  10.136127699999975
