In [11]:
!pip install scikit-surprise

from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

data = Dataset.load_builtin(name='ml-100k', prompt=False)

SVDalg = SVD()

cross_validate(SVDalg, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9320  0.9367  0.9240  0.9499  0.9305  0.9346  0.0086  
MAE (testset)     0.7342  0.7388  0.7264  0.7505  0.7332  0.7366  0.0080  
Fit time          1.71    1.76    1.40    1.40    1.42    1.54    0.16    
Test time         0.22    0.24    0.12    0.12    0.23    0.19    0.05    


{'test_rmse': array([0.9319951 , 0.93666871, 0.92402881, 0.94990908, 0.9305477 ]),
 'test_mae': array([0.73422487, 0.73875146, 0.72636825, 0.75047699, 0.73319925]),
 'fit_time': (1.7128820419311523,
  1.7575912475585938,
  1.395437240600586,
  1.3952395915985107,
  1.41884183883667),
 'test_time': (0.2174358367919922,
  0.23967385292053223,
  0.12321662902832031,
  0.12339496612548828,
  0.2323465347290039)}

In [12]:
from surprise import accuracy
from sklearn.metrics import mean_absolute_error
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data, test_size=.25)

SVDalg = SVD()

SVDalg.fit(trainset)
predictions = SVDalg.test(testset)

accuracy.rmse(predictions)

RMSE: 0.9424


0.942420804367665

In [13]:
from surprise.model_selection import GridSearchCV
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}
GSearch = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

GSearch.fit(data)

# best RMSE score
print(GSearch.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(GSearch.best_params['rmse'])

0.9637620146031498
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [14]:
!pip install sigopt
# Pass your API token directly, overriding any environment variables
from sigopt import Connection
token = Connection(client_token="CUWCHIMRWOFKKVRKTUASCBNPAETSYJTXZOIWVLWKEPRLSTCW")



In [15]:
experiment = token.experiments().create(
  name='Surprise Movies-100k Recommender',
  # Define which parameters you would like to tune
  parameters=[
    dict(name='n_epochs', type='int', bounds=dict(min=5, max=10)),
    dict(name='lr_all', type='double', bounds=dict(min=0.002, max=0.005)),
    dict(name='reg_all', type='double', bounds=dict(min=0.4, max=0.6))
  ],
  metrics=[
    dict(name='RMSE', objective='minimize', strategy='optimize'),
    dict(name='MAE', objective='minimize', strategy='store')
  ],
  parallel_bandwidth=1,
  # Define an Observation Budget for your experiment
  observation_budget=50,
)
print("Created experiment: https://app.sigopt.com/experiment/" + experiment.id)

Created experiment: https://app.sigopt.com/experiment/1024216


In [16]:
def create_model(assignments):
  SVDalg = SVD(
      n_epochs=assignments['n_epochs'],
      lr_all=assignments['lr_all'],
      reg_all=assignments['reg_all']
  ).fit(trainset)
  return SVDalg

def evaluate_model(assignments):
  alg = create_model(assignments)
  predictions = alg.test(testset)
  # Then compute RMSE and MAE:
  return [
      dict(name="RMSE", value=accuracy.rmse(predictions)),
      dict(name="MAE", value=accuracy.mae(predictions))
    ]

In [17]:
while experiment.progress.observation_count < experiment.observation_budget:
  suggestion = token.experiments(experiment.id).suggestions().create()
  i = 0
  value_dicts = evaluate_model(suggestion.assignments)
  token.experiments(experiment.id).observations().create(
    suggestion=suggestion.id,
    values=value_dicts,
  )

  # Update the experiment object
  experiment = token.experiments(experiment.id).fetch()

# Fetch the best configuration and explore your experiment
all_best_assignments = token.experiments(experiment.id).best_assignments().fetch()
# Returns a list of dict-like Observation objects
best_assignments = all_best_assignments.data[0].assignments
print("Best Assignments: " + str(best_assignments))
print("Explore your experiment: https://app.sigopt.com/experiment/" + experiment.id + "/analysis")

RMSE: 0.9940
MAE:  0.8024
RMSE: 0.9713
MAE:  0.7792
RMSE: 0.9800
MAE:  0.7887
RMSE: 0.9707
MAE:  0.7781
RMSE: 0.9736
MAE:  0.7809
RMSE: 0.9679
MAE:  0.7751
RMSE: 0.9692
MAE:  0.7771
RMSE: 0.9656
MAE:  0.7732
RMSE: 0.9766
MAE:  0.7836
RMSE: 0.9650
MAE:  0.7724
RMSE: 0.9648
MAE:  0.7722
RMSE: 0.9747
MAE:  0.7837
RMSE: 0.9789
MAE:  0.7871
RMSE: 0.9833
MAE:  0.7910
RMSE: 0.9753
MAE:  0.7826
RMSE: 0.9810
MAE:  0.7891
RMSE: 0.9752
MAE:  0.7842
RMSE: 0.9845
MAE:  0.7914
RMSE: 0.9840
MAE:  0.7905
RMSE: 0.9956
MAE:  0.8035
RMSE: 0.9819
MAE:  0.7902
RMSE: 0.9811
MAE:  0.7884
RMSE: 0.9679
MAE:  0.7756
RMSE: 0.9794
MAE:  0.7866
RMSE: 0.9863
MAE:  0.7947
RMSE: 0.9922
MAE:  0.8003
RMSE: 0.9904
MAE:  0.7984
RMSE: 0.9903
MAE:  0.7992
RMSE: 0.9677
MAE:  0.7754
RMSE: 0.9748
MAE:  0.7833
RMSE: 0.9875
MAE:  0.7942
RMSE: 0.9705
MAE:  0.7784
RMSE: 0.9707
MAE:  0.7792
RMSE: 0.9786
MAE:  0.7874
RMSE: 0.9800
MAE:  0.7878
RMSE: 0.9790
MAE:  0.7865
RMSE: 0.9651
MAE:  0.7726
RMSE: 0.9894
MAE:  0.7977
RMSE: 0.9771

