In [7]:
import pandas as pd
from surprise.model_selection import train_test_split, GridSearchCV, cross_validate
from surprise import Reader, Dataset, SVD, accuracy
import time

## Interactions 100% of Dataset

In [13]:
df = pd.read_csv("interactions_100.csv")
reader = Reader(rating_scale=(0,5))
data = Dataset.load_from_df(df[['u', 'i', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)

In [14]:
param_grid = {'n_factors': [10,25,75,100, 125], 'n_epochs': [5,10,20,30], 
              'lr_all': [0.005, 0.1, 0.01],'reg_all': [0,0.02,0.05,0.10,0.20]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv = 5, n_jobs = -1, joblib_verbose=5)
start = time.time()

gs.fit(data)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

print("RMSE: " , gs.best_score['rmse'])
print("Best Paramas: " , gs.best_params['rmse'])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:   21.9s
[Parallel(n_jobs=-1)]: Done 122 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 410 tasks      | elapsed:  5.4min
[Parallel(n_jobs=-1)]: Done 608 tasks      | elapsed:  9.4min
[Parallel(n_jobs=-1)]: Done 842 tasks      | elapsed: 14.9min
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed: 22.6min
[Parallel(n_jobs=-1)]: Done 1418 tasks      | elapsed: 33.2min


Elapsed time (in seconds):  2293.113224506378
RMSE:  0.9234952787288172
Best Paramas:  {'n_factors': 10, 'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.1}


[Parallel(n_jobs=-1)]: Done 1500 out of 1500 | elapsed: 38.1min finished


In [15]:
svd = SVD(n_factors=10, n_epochs=20, lr_all=0.005, reg_all=0.1)

start = time.time()

svd.fit(trainset)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

predictions = svd.test(testset)
print(accuracy.rmse(predictions))

Elapsed time (in seconds):  7.724573135375977
RMSE: 0.9280
0.9280104928710219


In [16]:
svd.predict(uid = '11', iid = '2166')

Prediction(uid='11', iid='2166', r_ui=None, est=4.564623299875275, details={'was_impossible': False})

## Interactions 50% of Dataset

In [8]:
df = pd.read_csv("interactions_50.csv")
reader = Reader(rating_scale=(0,5))
data = Dataset.load_from_df(df[['u', 'i', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)

In [9]:
param_grid = {'n_factors': [10,25,75,100, 125], 'n_epochs': [5,10,20,30], 
              'lr_all': [0.005, 0.1, 0.01],'reg_all': [0,0.02,0.05,0.10,0.20]}
gs2 = GridSearchCV(SVD, param_grid, measures=['rmse'], cv = 5, n_jobs = -1, joblib_verbose=5)
start = time.time()

gs2.fit(data)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

print("RMSE: " , gs2.best_score['rmse'])
print("Best Paramas: " , gs2.best_params['rmse'])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 122 tasks      | elapsed:   40.3s
[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 410 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 608 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 842 tasks      | elapsed:  7.6min
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed: 11.5min
[Parallel(n_jobs=-1)]: Done 1418 tasks      | elapsed: 16.9min


Elapsed time (in seconds):  1159.3083155155182
RMSE:  0.9303938914505544
Best Paramas:  {'n_factors': 10, 'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.2}


[Parallel(n_jobs=-1)]: Done 1500 out of 1500 | elapsed: 19.3min finished


In [11]:
svd2 = SVD(n_factors=10, n_epochs=20, lr_all=0.005, reg_all=0.2)

start = time.time()

svd2.fit(trainset)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

predictions2 = svd2.test(testset)
print(accuracy.rmse(predictions2))

Elapsed time (in seconds):  3.815241813659668
RMSE: 0.9343
0.9342573763683432


In [12]:
svd2.predict(uid = '11', iid = '2166')

Prediction(uid='11', iid='2166', r_ui=None, est=4.566059868147533, details={'was_impossible': False})

## Interactions 25% of Dataset

In [17]:
df = pd.read_csv("interactions_25.csv")
reader = Reader(rating_scale=(0,5))
data = Dataset.load_from_df(df[['u', 'i', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)

In [18]:
param_grid = {'n_factors': [10,25,75,100, 125], 'n_epochs': [5,10,20,30], 
              'lr_all': [0.005, 0.1, 0.01],'reg_all': [0,0.02,0.05,0.10,0.20]}
gs3 = GridSearchCV(SVD, param_grid, measures=['rmse'], cv = 5, n_jobs = -1, joblib_verbose=5)
start = time.time()

gs3.fit(data)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

print("RMSE: " , gs3.best_score['rmse'])
print("Best Paramas: " , gs3.best_params['rmse'])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 122 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:   47.2s
[Parallel(n_jobs=-1)]: Done 410 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 608 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 842 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed:  5.6min
[Parallel(n_jobs=-1)]: Done 1418 tasks      | elapsed:  8.2min


Elapsed time (in seconds):  568.7012932300568
RMSE:  0.9454468036461219
Best Paramas:  {'n_factors': 10, 'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.2}


[Parallel(n_jobs=-1)]: Done 1500 out of 1500 | elapsed:  9.5min finished


In [21]:
svd3 = SVD(n_factors=10, n_epochs=20, lr_all=0.005, reg_all=0.2)

start = time.time()

svd3.fit(trainset)

end = time.time()
elapsed_seconds = (end-start)
print("Elapsed time (in seconds): ", elapsed_seconds)

predictions3 = svd3.test(testset)
print(accuracy.rmse(predictions3))

Elapsed time (in seconds):  1.8769786357879639
RMSE: 0.9394
0.9394088055447738


In [24]:
svd3.predict(uid = '11', iid = '2166')

Prediction(uid='11', iid='2166', r_ui=None, est=4.561234186612817, details={'was_impossible': False})