In [1]:
# Import the correct algorithm
from surprise import Dataset, CoClustering, Reader 
from surprise.model_selection import GridSearchCV, train_test_split

import pandas as pd

import time

In [2]:
df = Dataset.load_builtin('ml-1m')

In [3]:
# As we're loading a custom dataset, we need to define a reader. In the
# movielens-1m dataset, each line has the following format:
# 'user item rating timestamp', separated by '::' characters.
reader = Reader(line_format="user item rating timestamp",sep='::')

# Use movielens-1m
# change parameter grid according to what you want to tune in your model, shouldn't be too complicated, just a 
# few sets of permutations will suffice
df = Dataset.load_builtin('ml-1m')
param_grid = {"n_cltr_u": [2,3,4], 
              "n_cltr_i": [2,3,4], 
              "random_state":[1],
              "verbose":[True]}

# change algorithm here to SVD etc.
gs = GridSearchCV(CoClustering, param_grid, measures=["rmse"], cv=3, n_jobs=-1)

gs.param_combinations


[{'n_cltr_u': 2, 'n_cltr_i': 2, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 2, 'n_cltr_i': 3, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 2, 'n_cltr_i': 4, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 3, 'n_cltr_i': 2, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 3, 'n_cltr_i': 3, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 3, 'n_cltr_i': 4, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 4, 'n_cltr_i': 2, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 4, 'n_cltr_i': 3, 'random_state': 1, 'verbose': True},
 {'n_cltr_u': 4, 'n_cltr_i': 4, 'random_state': 1, 'verbose': True}]

In [None]:
gs.fit(df)
# best RMSE score
print('RMSE_best_score:',gs.best_score["rmse"])

# combination of parameters that gave the best RMSE score
print('RMSE_best_params',gs.best_params["rmse"])


In [None]:
train_data, test_data = train_test_split(df, test_size=0.25, random_state=1)

In [None]:
algo = gs.best_estimator["rmse"]

algo.fit(train_data)

In [None]:
test_data = pd.DataFrame(test_data)
test_data = test_data.rename(columns={
    0: "uid",
    1: "iid",
    2: "rating"
})

In [None]:
y_pred_array = []
begin = time.time()
for i in range(len(test_data)):
    y_pred = algo.predict(test_data.loc[i]['uid'], test_data.loc[i]['iid'])
    y_pred_array.append(y_pred)
end = time.time()    


In [None]:
y_pred_array

In [None]:
print(f"time taken to generate test predictions = {round(end-begin, 2)} seconds")