In [1]:
from MovieLens import MovieLens
from surprise import SVD, SVDpp
from surprise import NormalPredictor
from Evaluator import Evaluator

import random
import numpy as np

In [2]:
def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.loadMovieLensLatestSmall()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

np.random.seed(0)
random.seed(0)

In [3]:
(ml, evaluationData, rankings) = LoadMovieLensData()

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...


In [4]:
evaluator = Evaluator(evaluationData, rankings)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [5]:
SVD = SVD()
evaluator.AddAlgorithm(SVD, "SVD")

In [6]:
SVDPlusPlus = SVDpp()
evaluator.AddAlgorithm(SVDPlusPlus, "SVD++")

In [7]:
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

In [8]:
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Evaluating  SVD ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD++ ...
Evaluating accuracy...
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
SVD        0.9039     0.6984    
SVD++      0.8943     0.6887    
Random     1.4359     1.1493    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  SVD

Building recommendation model...
Computing recommendations...

We recommend:
Gladiator (1992) 4.520884890007874
Philadelphia Story, The (1940) 4.420701711947352
Stand by Me (1986) 4.3959589752178365
Moon (2009) 4.372613693384055
Happiness (1998) 4.369493252705134
American Graffiti (1973) 4.353470600109924
And Your Mother Too (Y tu mamÃ¡ tambiÃ©n) (2001) 4.3491924926304995
Wallace & Gromit: A Close Shave (1995) 4.3154412154304085
Band of Brothers (2001) 4.315414828016616
Seven Samurai (Shichinin no

In [12]:
from surprise.model_selection import GridSearchCV
from surprise import SVD
print("Searching for best parameters...")
param_grid = {'n_epochs': [20, 30], 'lr_all': [0.005, 0.010],
              'n_factors': [50, 100]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(evaluationData)

Searching for best parameters...


In [13]:
# best RMSE score
print("Best RMSE score attained: ", gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

Best RMSE score attained:  0.8995438007617033
{'n_epochs': 20, 'lr_all': 0.005, 'n_factors': 50}


In [14]:
# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

params = gs.best_params['rmse']
SVDtuned = SVD(n_epochs = params['n_epochs'], lr_all = params['lr_all'], n_factors = params['n_factors'])
evaluator.AddAlgorithm(SVDtuned, "SVD - Tuned")

SVDUntuned = SVD()
evaluator.AddAlgorithm(SVDUntuned, "SVD - Untuned")

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [15]:
# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

In [16]:
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Evaluating  SVD ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD++ ...
Evaluating accuracy...
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD - Tuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD - Untuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
SVD        0.9041     0.6996    
SVD++      0.8951     0.6880    
Random     1.4492     1.1555    
SVD - Tuned 0.9020     0.6969    
SVD - Untuned 0.9028     0.6981    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  SVD

Building recommendation model...
Computing recommendations...

We recommend:
Kids (1995) 4.559904203316939
Seven (a.k.a. Se7en) (1995) 4.506499614528053
Singin' in the Rain (1952) 4.454751408627558
Three Colors: White (Trzy k