In [8]:
import sys
sys.path.append('../../')

from os.path import join as pjoin

import pandas as pd
import numpy as np

from src.utils import read_json_df
from src.models.factorization import AlternatingLeastSquaresModel, FunkSVDModel
from src.models.nn import NNColaborativeModel

In [9]:
DATASET_PATH = "../../data/yelp_dataset/"

In [10]:
review_df = read_json_df(pjoin(DATASET_PATH, "yelp_academic_dataset_review.json"))
business_df = read_json_df(pjoin(DATASET_PATH, "yelp_academic_dataset_business.json"))
user_df = read_json_df(pjoin(DATASET_PATH, "yelp_academic_dataset_user.json"))

In [11]:
review_df['date'] = pd.to_datetime(review_df['date'])

## ALS

Hyperparameter tuning:

In [8]:
for lambda_ in [0.1, 1, 10, 100]:
    for n_factors in [10, 20, 50, 100, 200]:
        als_model = AlternatingLeastSquaresModel(n_factors=n_factors, reguralization_param=lambda_, eps=1e-2)
        metrics = als_model.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=10_000)

        print(f"Lambda={lambda_}, {n_factors=}: RMSE={metrics['rmse']}, Accuracy={metrics['accuracy']}, MAP@K={metrics['MAP@K']}")

Evaluation fold: 100%|██████████| 1/1 [00:23<00:00, 23.90s/it]


Lambda=0.1, n_factors=10: RMSE=1.2166583370891606, Accuracy=0.301, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:30<00:00, 30.03s/it]


Lambda=0.1, n_factors=20: RMSE=1.2337812911032755, Accuracy=0.302, MAP@K=0.0009520574534161491


Evaluation fold: 100%|██████████| 1/1 [00:35<00:00, 35.28s/it]


Lambda=0.1, n_factors=50: RMSE=1.216983639456942, Accuracy=0.308, MAP@K=0.00029772688060731537


Evaluation fold: 100%|██████████| 1/1 [00:42<00:00, 42.41s/it]


Lambda=0.1, n_factors=100: RMSE=1.206947928120437, Accuracy=0.304, MAP@K=0.0005241761559696342


Evaluation fold: 100%|██████████| 1/1 [01:20<00:00, 80.46s/it]


Lambda=0.1, n_factors=200: RMSE=1.1910057974850807, Accuracy=0.317, MAP@K=0.0010483523119392685


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.39s/it]


Lambda=1, n_factors=10: RMSE=1.3613160062176735, Accuracy=0.21, MAP@K=2.7173913043478262e-05


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.50s/it]


Lambda=1, n_factors=20: RMSE=1.1948659552566443, Accuracy=0.3, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:24<00:00, 24.26s/it]


Lambda=1, n_factors=50: RMSE=1.1908634443652306, Accuracy=0.312, MAP@K=0.0006350284679089025


Evaluation fold: 100%|██████████| 1/1 [00:41<00:00, 41.76s/it]


Lambda=1, n_factors=100: RMSE=1.2057957597985491, Accuracy=0.305, MAP@K=0.0002667788129744651


Evaluation fold: 100%|██████████| 1/1 [01:16<00:00, 76.26s/it]


Lambda=1, n_factors=200: RMSE=1.2460602227003066, Accuracy=0.239, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.38s/it]


Lambda=10, n_factors=10: RMSE=1.148321494043257, Accuracy=0.32, MAP@K=0.0010483523119392685


Evaluation fold: 100%|██████████| 1/1 [00:19<00:00, 19.11s/it]


Lambda=10, n_factors=20: RMSE=1.2046292604353062, Accuracy=0.25, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:23<00:00, 23.64s/it]


Lambda=10, n_factors=50: RMSE=1.1499304809642, Accuracy=0.325, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:36<00:00, 36.36s/it]


Lambda=10, n_factors=100: RMSE=1.190070551334774, Accuracy=0.257, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [01:08<00:00, 68.56s/it]


Lambda=10, n_factors=200: RMSE=1.1537055530924052, Accuracy=0.32, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:20<00:00, 20.09s/it]


Lambda=100, n_factors=10: RMSE=1.139492087613765, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:24<00:00, 24.01s/it]


Lambda=100, n_factors=20: RMSE=1.1462382084204559, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:34<00:00, 34.45s/it]


Lambda=100, n_factors=50: RMSE=1.1354198762837842, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:40<00:00, 40.07s/it]


Lambda=100, n_factors=100: RMSE=1.138961064068638, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [01:52<00:00, 112.50s/it]

Lambda=100, n_factors=200: RMSE=1.1348668592973437, Accuracy=0.328, MAP@K=0.0





Best model found:

In [6]:
als_model = AlternatingLeastSquaresModel(n_factors=10, reguralization_param=10, eps=1e-2)
als_model.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=100_000)

Evaluation fold: 100%|██████████| 1/1 [12:23<00:00, 743.33s/it]


{'rmse': 1.2592897376561798,
 'mae': 1.0770081622258794,
 'accuracy': 0.2119,
 'f1': 0.11503925313041885,
 'precision': 0.5417094352467278,
 'recall': 0.20759367098066414,
 'AP@1': 0.0,
 'AP@3': 8.804366966015144e-05,
 'AP@K': 0.00013206550449022716,
 'MAP@K': 6.3349516598328e-05}

## FunkSVD

Hyperparameter tuning:

In [12]:
for lambda_ in [0.1, 1, 10]:
    for n_factors in [10, 20, 50]:
        for learning_rate in [1e-2, 0.1, 1]:
            for n_epoch in [5, 10, 50]:
                fsvdmodel = FunkSVDModel(n_factors=n_factors, reguralization_param=lambda_, learning_rate=learning_rate, n_epoch=n_epoch)
                metrics = fsvdmodel.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=10_000)

                print(f"Lambda={lambda_}, {n_factors=}, {learning_rate=}, {n_epoch=}: RMSE={metrics['rmse']}, Accuracy={metrics['accuracy']}, MAP@K={metrics['MAP@K']}")

100%|██████████| 10/10 [00:04<00:00,  2.44it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:21<00:00, 21.62s/it]


Lambda=0.1, n_factors=10, learning_rate=0.01, n_epoch=5: RMSE=1.0916468738069107, Accuracy=0.356, MAP@K=0.0006486154244306417


100%|██████████| 10/10 [00:08<00:00,  1.24it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:21<00:00, 21.70s/it]


Lambda=0.1, n_factors=10, learning_rate=0.01, n_epoch=10: RMSE=1.1089550863531878, Accuracy=0.353, MAP@K=0.00038906142167011734


100%|██████████| 10/10 [00:37<00:00,  3.74s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:51<00:00, 51.12s/it]


Lambda=0.1, n_factors=10, learning_rate=0.01, n_epoch=50: RMSE=1.1391677996209402, Accuracy=0.342, MAP@K=0.0


100%|██████████| 10/10 [00:04<00:00,  2.45it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.34s/it]


Lambda=0.1, n_factors=10, learning_rate=0.1, n_epoch=5: RMSE=1.1211165742018925, Accuracy=0.335, MAP@K=0.0


100%|██████████| 10/10 [00:08<00:00,  1.19it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:23<00:00, 23.12s/it]


Lambda=0.1, n_factors=10, learning_rate=0.1, n_epoch=10: RMSE=1.1269227301007423, Accuracy=0.342, MAP@K=0.0


100%|██████████| 10/10 [00:39<00:00,  3.99s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:55<00:00, 55.01s/it]


Lambda=0.1, n_factors=10, learning_rate=0.1, n_epoch=50: RMSE=1.1241769673863302, Accuracy=0.343, MAP@K=0.0


100%|██████████| 10/10 [00:04<00:00,  2.20it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:20<00:00, 20.89s/it]


Lambda=0.1, n_factors=10, learning_rate=1, n_epoch=5: RMSE=4.3225740576732585, Accuracy=0.222, MAP@K=0.00038830659075224296


100%|██████████| 10/10 [00:07<00:00,  1.28it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.56s/it]


Lambda=0.1, n_factors=10, learning_rate=1, n_epoch=10: RMSE=4.242508917463583, Accuracy=0.261, MAP@K=0.0001754442719116632


100%|██████████| 10/10 [00:39<00:00,  3.93s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:55<00:00, 55.22s/it]


Lambda=0.1, n_factors=10, learning_rate=1, n_epoch=50: RMSE=4.256572519912619, Accuracy=0.237, MAP@K=0.0001754442719116632


100%|██████████| 20/20 [00:09<00:00,  2.04it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:26<00:00, 26.34s/it]


Lambda=0.1, n_factors=20, learning_rate=0.01, n_epoch=5: RMSE=1.1041094543813912, Accuracy=0.374, MAP@K=0.0001754442719116632


100%|██████████| 20/20 [00:17<00:00,  1.17it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:31<00:00, 31.47s/it]


Lambda=0.1, n_factors=20, learning_rate=0.01, n_epoch=10: RMSE=1.1114886645666764, Accuracy=0.352, MAP@K=9.133454106280193e-05


100%|██████████| 20/20 [01:17<00:00,  3.88s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:31<00:00, 91.34s/it]


Lambda=0.1, n_factors=20, learning_rate=0.01, n_epoch=50: RMSE=1.118993913899125, Accuracy=0.352, MAP@K=0.0


100%|██████████| 20/20 [00:07<00:00,  2.54it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.24s/it]


Lambda=0.1, n_factors=20, learning_rate=0.1, n_epoch=5: RMSE=1.1297908580629024, Accuracy=0.351, MAP@K=0.0


100%|██████████| 20/20 [00:16<00:00,  1.24it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:30<00:00, 31.00s/it]


Lambda=0.1, n_factors=20, learning_rate=0.1, n_epoch=10: RMSE=1.1321122291983052, Accuracy=0.347, MAP@K=0.0


100%|██████████| 20/20 [01:21<00:00,  4.08s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:35<00:00, 95.88s/it]


Lambda=0.1, n_factors=20, learning_rate=0.1, n_epoch=50: RMSE=1.1063273806764913, Accuracy=0.326, MAP@K=0.0


100%|██████████| 20/20 [00:09<00:00,  2.10it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:24<00:00, 24.56s/it]


Lambda=0.1, n_factors=20, learning_rate=1, n_epoch=5: RMSE=4.4414394447377745, Accuracy=0.232, MAP@K=0.0


100%|██████████| 20/20 [00:16<00:00,  1.19it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:32<00:00, 32.16s/it]


Lambda=0.1, n_factors=20, learning_rate=1, n_epoch=10: RMSE=4.569223478032548, Accuracy=0.24, MAP@K=0.00029772688060731537


100%|██████████| 20/20 [01:24<00:00,  4.20s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:38<00:00, 98.25s/it]


Lambda=0.1, n_factors=20, learning_rate=1, n_epoch=50: RMSE=4.920119394081411, Accuracy=0.234, MAP@K=0.0006543305728088337


100%|██████████| 50/50 [00:22<00:00,  2.20it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:37<00:00, 37.37s/it]


Lambda=0.1, n_factors=50, learning_rate=0.01, n_epoch=5: RMSE=1.11452407062177, Accuracy=0.364, MAP@K=0.0


100%|██████████| 50/50 [00:40<00:00,  1.24it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:54<00:00, 54.80s/it]


Lambda=0.1, n_factors=50, learning_rate=0.01, n_epoch=10: RMSE=1.1341435115312068, Accuracy=0.351, MAP@K=0.00013015441683919944


100%|██████████| 50/50 [03:18<00:00,  3.97s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:33<00:00, 213.60s/it]


Lambda=0.1, n_factors=50, learning_rate=0.01, n_epoch=50: RMSE=1.1151897095432963, Accuracy=0.354, MAP@K=0.00022979209799861973


100%|██████████| 50/50 [00:20<00:00,  2.43it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:33<00:00, 33.80s/it]


Lambda=0.1, n_factors=50, learning_rate=0.1, n_epoch=5: RMSE=1.122052920494937, Accuracy=0.346, MAP@K=0.0


100%|██████████| 50/50 [00:38<00:00,  1.29it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:51<00:00, 51.95s/it]


Lambda=0.1, n_factors=50, learning_rate=0.1, n_epoch=10: RMSE=1.1118114816404128, Accuracy=0.347, MAP@K=0.0


100%|██████████| 50/50 [03:11<00:00,  3.82s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:25<00:00, 205.48s/it]


Lambda=0.1, n_factors=50, learning_rate=0.1, n_epoch=50: RMSE=1.105913097371144, Accuracy=0.339, MAP@K=5.7367149758454107e-05


100%|██████████| 50/50 [00:24<00:00,  2.07it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:39<00:00, 39.05s/it]


Lambda=0.1, n_factors=50, learning_rate=1, n_epoch=5: RMSE=4.310384684684192, Accuracy=0.214, MAP@K=0.00018266908212560387


100%|██████████| 50/50 [00:42<00:00,  1.17it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:58<00:00, 58.11s/it]


Lambda=0.1, n_factors=50, learning_rate=1, n_epoch=10: RMSE=4.7197669529854185, Accuracy=0.245, MAP@K=0.0008841226708074534


100%|██████████| 50/50 [03:38<00:00,  4.37s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:53<00:00, 233.57s/it]


Lambda=0.1, n_factors=50, learning_rate=1, n_epoch=50: RMSE=4.554944872233758, Accuracy=0.222, MAP@K=0.0005241761559696342


100%|██████████| 10/10 [00:04<00:00,  2.27it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.91s/it]


Lambda=1, n_factors=10, learning_rate=0.01, n_epoch=5: RMSE=1.0996769890686955, Accuracy=0.333, MAP@K=0.0


100%|██████████| 10/10 [00:07<00:00,  1.30it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.20s/it]


Lambda=1, n_factors=10, learning_rate=0.01, n_epoch=10: RMSE=1.0980194370845295, Accuracy=0.336, MAP@K=0.0


100%|██████████| 10/10 [00:39<00:00,  3.93s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.78s/it]


Lambda=1, n_factors=10, learning_rate=0.01, n_epoch=50: RMSE=1.104927467485168, Accuracy=0.332, MAP@K=0.0


100%|██████████| 10/10 [00:04<00:00,  2.45it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.21s/it]


Lambda=1, n_factors=10, learning_rate=0.1, n_epoch=5: RMSE=1.100700717508836, Accuracy=0.339, MAP@K=0.0005815433057280884


100%|██████████| 10/10 [00:07<00:00,  1.26it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.27s/it]


Lambda=1, n_factors=10, learning_rate=0.1, n_epoch=10: RMSE=1.1092725068383653, Accuracy=0.324, MAP@K=0.0


100%|██████████| 10/10 [00:39<00:00,  3.91s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.45s/it]


Lambda=1, n_factors=10, learning_rate=0.1, n_epoch=50: RMSE=1.1293837677767529, Accuracy=0.31, MAP@K=0.0


100%|██████████| 10/10 [00:04<00:00,  2.44it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:19<00:00, 19.06s/it]


Lambda=1, n_factors=10, learning_rate=1, n_epoch=5: RMSE=2.7532842481431263, Accuracy=0.252, MAP@K=0.0003055986887508627


100%|██████████| 10/10 [00:07<00:00,  1.29it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.10s/it]


Lambda=1, n_factors=10, learning_rate=1, n_epoch=10: RMSE=2.694605276115318, Accuracy=0.256, MAP@K=0.00038830659075224296


100%|██████████| 10/10 [00:40<00:00,  4.03s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:56<00:00, 56.11s/it]


Lambda=1, n_factors=10, learning_rate=1, n_epoch=50: RMSE=2.60881282056824, Accuracy=0.261, MAP@K=0.0


100%|██████████| 20/20 [00:08<00:00,  2.29it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:23<00:00, 23.02s/it]


Lambda=1, n_factors=20, learning_rate=0.01, n_epoch=5: RMSE=1.0934353886955186, Accuracy=0.339, MAP@K=0.0


100%|██████████| 20/20 [00:16<00:00,  1.22it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:31<00:00, 32.00s/it]


Lambda=1, n_factors=20, learning_rate=0.01, n_epoch=10: RMSE=1.0996350690691379, Accuracy=0.339, MAP@K=0.0


100%|██████████| 20/20 [01:21<00:00,  4.08s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:36<00:00, 96.93s/it]


Lambda=1, n_factors=20, learning_rate=0.01, n_epoch=50: RMSE=1.0941908397752238, Accuracy=0.326, MAP@K=0.00022979209799861973


100%|██████████| 20/20 [00:08<00:00,  2.39it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:24<00:00, 24.25s/it]


Lambda=1, n_factors=20, learning_rate=0.1, n_epoch=5: RMSE=1.1030632930381064, Accuracy=0.33, MAP@K=0.0


100%|██████████| 20/20 [00:17<00:00,  1.16it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:31<00:00, 31.66s/it]


Lambda=1, n_factors=20, learning_rate=0.1, n_epoch=10: RMSE=1.0965447886381903, Accuracy=0.327, MAP@K=0.0


100%|██████████| 20/20 [01:17<00:00,  3.87s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:31<00:00, 91.41s/it]


Lambda=1, n_factors=20, learning_rate=0.1, n_epoch=50: RMSE=1.1257619415154805, Accuracy=0.323, MAP@K=0.0


100%|██████████| 20/20 [00:08<00:00,  2.38it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.59s/it]


Lambda=1, n_factors=20, learning_rate=1, n_epoch=5: RMSE=2.6411828612476707, Accuracy=0.262, MAP@K=0.00022979209799861973


100%|██████████| 20/20 [00:15<00:00,  1.26it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:30<00:00, 30.08s/it]


Lambda=1, n_factors=20, learning_rate=1, n_epoch=10: RMSE=2.698732140606675, Accuracy=0.243, MAP@K=0.001093642167011732


100%|██████████| 20/20 [01:16<00:00,  3.83s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:30<00:00, 90.20s/it]


Lambda=1, n_factors=20, learning_rate=1, n_epoch=50: RMSE=2.5273664976113333, Accuracy=0.261, MAP@K=5.7367149758454107e-05


100%|██████████| 50/50 [00:20<00:00,  2.46it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:33<00:00, 33.92s/it]


Lambda=1, n_factors=50, learning_rate=0.01, n_epoch=5: RMSE=1.0861938267367444, Accuracy=0.337, MAP@K=9.133454106280193e-05


100%|██████████| 50/50 [00:38<00:00,  1.29it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:52<00:00, 52.26s/it]


Lambda=1, n_factors=50, learning_rate=0.01, n_epoch=10: RMSE=1.095081210931113, Accuracy=0.329, MAP@K=0.0


100%|██████████| 50/50 [03:08<00:00,  3.77s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:22<00:00, 202.58s/it]


Lambda=1, n_factors=50, learning_rate=0.01, n_epoch=50: RMSE=1.0932871023849617, Accuracy=0.333, MAP@K=0.00022979209799861973


100%|██████████| 50/50 [00:20<00:00,  2.50it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:33<00:00, 33.56s/it]


Lambda=1, n_factors=50, learning_rate=0.1, n_epoch=5: RMSE=1.0926529385763666, Accuracy=0.333, MAP@K=0.0


100%|██████████| 50/50 [00:38<00:00,  1.31it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:51<00:00, 51.58s/it]


Lambda=1, n_factors=50, learning_rate=0.1, n_epoch=10: RMSE=1.0960385598705809, Accuracy=0.323, MAP@K=0.0


100%|██████████| 50/50 [03:15<00:00,  3.91s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:29<00:00, 209.77s/it]


Lambda=1, n_factors=50, learning_rate=0.1, n_epoch=50: RMSE=1.1054762439140235, Accuracy=0.324, MAP@K=0.0


100%|██████████| 50/50 [00:20<00:00,  2.45it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:33<00:00, 33.76s/it]


Lambda=1, n_factors=50, learning_rate=1, n_epoch=5: RMSE=2.617136608367162, Accuracy=0.24, MAP@K=0.00035994651483781917


100%|██████████| 50/50 [00:39<00:00,  1.26it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.07s/it]


Lambda=1, n_factors=50, learning_rate=1, n_epoch=10: RMSE=2.465389782925123, Accuracy=0.253, MAP@K=0.000405236369910283


100%|██████████| 50/50 [03:12<00:00,  3.85s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:26<00:00, 206.75s/it]


Lambda=1, n_factors=50, learning_rate=1, n_epoch=50: RMSE=2.428914381521785, Accuracy=0.249, MAP@K=0.0


100%|██████████| 10/10 [00:03<00:00,  2.51it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.54s/it]


Lambda=10, n_factors=10, learning_rate=0.01, n_epoch=5: RMSE=1.1308767947746958, Accuracy=0.327, MAP@K=0.0


100%|██████████| 10/10 [00:07<00:00,  1.30it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:21<00:00, 21.46s/it]


Lambda=10, n_factors=10, learning_rate=0.01, n_epoch=10: RMSE=1.131751457399864, Accuracy=0.317, MAP@K=0.0001754442719116632


100%|██████████| 10/10 [00:36<00:00,  3.68s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:50<00:00, 50.51s/it]


Lambda=10, n_factors=10, learning_rate=0.01, n_epoch=50: RMSE=1.1334595265175744, Accuracy=0.321, MAP@K=0.00014870169082125605


100%|██████████| 10/10 [00:03<00:00,  2.59it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.73s/it]


Lambda=10, n_factors=10, learning_rate=0.1, n_epoch=5: RMSE=1.145917590038664, Accuracy=0.316, MAP@K=0.0


100%|██████████| 10/10 [00:07<00:00,  1.27it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.04s/it]


Lambda=10, n_factors=10, learning_rate=0.1, n_epoch=10: RMSE=1.176248220018964, Accuracy=0.309, MAP@K=5.4347826086956524e-05


100%|██████████| 10/10 [00:39<00:00,  3.92s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:52<00:00, 52.91s/it]


Lambda=10, n_factors=10, learning_rate=0.1, n_epoch=50: RMSE=1.1609516793358703, Accuracy=0.301, MAP@K=0.00013015441683919944


100%|██████████| 10/10 [00:03<00:00,  2.54it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.54s/it]


Lambda=10, n_factors=10, learning_rate=1, n_epoch=5: RMSE=2.8347522323096523, Accuracy=0.24, MAP@K=0.0005241761559696342


100%|██████████| 10/10 [00:07<00:00,  1.32it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:21<00:00, 21.37s/it]


Lambda=10, n_factors=10, learning_rate=1, n_epoch=10: RMSE=2.9725751748879583, Accuracy=0.219, MAP@K=0.0005241761559696342


100%|██████████| 10/10 [00:38<00:00,  3.82s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:52<00:00, 52.90s/it]


Lambda=10, n_factors=10, learning_rate=1, n_epoch=50: RMSE=2.899892506886752, Accuracy=0.234, MAP@K=5.7367149758454107e-05


100%|██████████| 20/20 [00:08<00:00,  2.26it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:22<00:00, 22.94s/it]


Lambda=10, n_factors=20, learning_rate=0.01, n_epoch=5: RMSE=1.1201449119538676, Accuracy=0.329, MAP@K=0.0


100%|██████████| 20/20 [00:22<00:00,  1.11s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:38<00:00, 38.77s/it]


Lambda=10, n_factors=20, learning_rate=0.01, n_epoch=10: RMSE=1.1249821766243489, Accuracy=0.328, MAP@K=0.00013015441683919944


100%|██████████| 20/20 [01:23<00:00,  4.18s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:39<00:00, 99.20s/it]


Lambda=10, n_factors=20, learning_rate=0.01, n_epoch=50: RMSE=1.1460852858387647, Accuracy=0.32, MAP@K=0.001093642167011732


100%|██████████| 20/20 [00:08<00:00,  2.37it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:23<00:00, 23.21s/it]


Lambda=10, n_factors=20, learning_rate=0.1, n_epoch=5: RMSE=1.1549788885273105, Accuracy=0.318, MAP@K=0.0


100%|██████████| 20/20 [00:16<00:00,  1.21it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:30<00:00, 30.69s/it]


Lambda=10, n_factors=20, learning_rate=0.1, n_epoch=10: RMSE=1.1651248252708137, Accuracy=0.319, MAP@K=9.133454106280193e-05


100%|██████████| 20/20 [01:20<00:00,  4.02s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:34<00:00, 94.39s/it]


Lambda=10, n_factors=20, learning_rate=0.1, n_epoch=50: RMSE=1.1732991935367887, Accuracy=0.308, MAP@K=0.0


100%|██████████| 20/20 [00:07<00:00,  2.56it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:21<00:00, 21.57s/it]


Lambda=10, n_factors=20, learning_rate=1, n_epoch=5: RMSE=2.995871465720969, Accuracy=0.221, MAP@K=0.0


100%|██████████| 20/20 [00:15<00:00,  1.27it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:29<00:00, 29.70s/it]


Lambda=10, n_factors=20, learning_rate=1, n_epoch=10: RMSE=2.863613738758916, Accuracy=0.217, MAP@K=0.0


100%|██████████| 20/20 [01:18<00:00,  3.90s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [01:32<00:00, 92.12s/it]


Lambda=10, n_factors=20, learning_rate=1, n_epoch=50: RMSE=2.808191554748913, Accuracy=0.237, MAP@K=0.0


100%|██████████| 50/50 [00:20<00:00,  2.42it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:34<00:00, 34.64s/it]


Lambda=10, n_factors=50, learning_rate=0.01, n_epoch=5: RMSE=1.1237031517769192, Accuracy=0.328, MAP@K=9.133454106280193e-05


100%|██████████| 50/50 [00:39<00:00,  1.26it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.51s/it]


Lambda=10, n_factors=50, learning_rate=0.01, n_epoch=10: RMSE=1.1226550228156393, Accuracy=0.328, MAP@K=0.0006860334713595583


100%|██████████| 50/50 [03:12<00:00,  3.85s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:26<00:00, 206.21s/it]


Lambda=10, n_factors=50, learning_rate=0.01, n_epoch=50: RMSE=1.1214183871845296, Accuracy=0.327, MAP@K=0.000925314872325742


100%|██████████| 50/50 [00:20<00:00,  2.42it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:34<00:00, 34.48s/it]


Lambda=10, n_factors=50, learning_rate=0.1, n_epoch=5: RMSE=1.1531227984759704, Accuracy=0.313, MAP@K=0.0


100%|██████████| 50/50 [00:39<00:00,  1.27it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.44s/it]


Lambda=10, n_factors=50, learning_rate=0.1, n_epoch=10: RMSE=1.142361091030039, Accuracy=0.31, MAP@K=0.00029772688060731537


100%|██████████| 50/50 [03:19<00:00,  3.99s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:33<00:00, 213.12s/it]


Lambda=10, n_factors=50, learning_rate=0.1, n_epoch=50: RMSE=1.1512073996672871, Accuracy=0.311, MAP@K=0.00022979209799861973


100%|██████████| 50/50 [00:19<00:00,  2.52it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:33<00:00, 33.49s/it]


Lambda=10, n_factors=50, learning_rate=1, n_epoch=5: RMSE=2.908693145695038, Accuracy=0.226, MAP@K=0.0


100%|██████████| 50/50 [00:39<00:00,  1.27it/s] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [00:53<00:00, 53.63s/it]


Lambda=10, n_factors=50, learning_rate=1, n_epoch=10: RMSE=2.9482095421391863, Accuracy=0.222, MAP@K=0.0015918305728088336


100%|██████████| 50/50 [03:20<00:00,  4.00s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [03:34<00:00, 214.06s/it]

Lambda=10, n_factors=50, learning_rate=1, n_epoch=50: RMSE=2.825883914741728, Accuracy=0.217, MAP@K=5.7367149758454107e-05





Best model found:

In [13]:
fsvdmodel = FunkSVDModel(n_factors=20, reguralization_param=10, learning_rate=0.01, n_epoch=50)
fsvdmodel.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=100_000)

100%|██████████| 20/20 [12:12<00:00, 36.60s/it] ?it/s]
Evaluation fold: 100%|██████████| 1/1 [23:08<00:00, 1388.90s/it]


{'rmse': 1.2311598693815586,
 'mae': 0.9733139898439149,
 'accuracy': 0.3362,
 'f1': 0.13284116615241162,
 'precision': 0.3633863012376771,
 'recall': 0.20141669590148217,
 'AP@1': 0.0,
 'AP@3': 0.0,
 'AP@K': 0.0,
 'MAP@K': 0.0}

## NN Colaborative Filtering

Hyperparameter tuning:

In [29]:
for epochs in [1, 3, 10, 30]:
    for n_factors in [5, 10, 20, 50]:
        for learning_rate in [1e-3, 1e-2, 0.1]:
            nnmodel = NNColaborativeModel(learning_rate=learning_rate, n_embed=n_factors, epochs=epochs, batch_size=1024)
            metrics = nnmodel.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=10_000)
            print(f"Epochs={epochs}, {n_factors=}, {learning_rate=}: RMSE={metrics['rmse']}, Accuracy={metrics['accuracy']}, MAP@K={metrics['MAP@K']}")


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.63s/it]


Epochs=1, n_factors=5, learning_rate=0.001: RMSE=1.153872540990505, Accuracy=0.272, MAP@K=0.0001754442719116632


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.47s/it]


Epochs=1, n_factors=5, learning_rate=0.01: RMSE=1.1301813317687588, Accuracy=0.328, MAP@K=0.0002667788129744651


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.35s/it]


Epochs=1, n_factors=5, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.0004512810559006211


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.44s/it]


Epochs=1, n_factors=10, learning_rate=0.001: RMSE=1.1318634554135296, Accuracy=0.329, MAP@K=2.7173913043478262e-05


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.64s/it]


Epochs=1, n_factors=10, learning_rate=0.01: RMSE=1.1281237396820485, Accuracy=0.323, MAP@K=0.00013015441683919944


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.71s/it]


Epochs=1, n_factors=10, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.0011422748447204969


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.94s/it]


Epochs=1, n_factors=20, learning_rate=0.001: RMSE=1.1438469043725836, Accuracy=0.278, MAP@K=0.00047964113181504493


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.55s/it]


Epochs=1, n_factors=20, learning_rate=0.01: RMSE=1.1273760659326866, Accuracy=0.329, MAP@K=0.00029772688060731537


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.44s/it]


Epochs=1, n_factors=20, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00035994651483781917


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.65s/it]


Epochs=1, n_factors=50, learning_rate=0.001: RMSE=1.1464346142984096, Accuracy=0.281, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.64s/it]


Epochs=1, n_factors=50, learning_rate=0.01: RMSE=1.1316969601128872, Accuracy=0.327, MAP@K=0.0001754442719116632


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.45s/it]


Epochs=1, n_factors=50, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.89s/it]


Epochs=3, n_factors=5, learning_rate=0.001: RMSE=1.1500563314078949, Accuracy=0.329, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.77s/it]


Epochs=3, n_factors=5, learning_rate=0.01: RMSE=1.1318809574245388, Accuracy=0.328, MAP@K=0.0005241761559696342


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.60s/it]


Epochs=3, n_factors=5, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.00s/it]


Epochs=3, n_factors=10, learning_rate=0.001: RMSE=1.1339387291341594, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.83s/it]


Epochs=3, n_factors=10, learning_rate=0.01: RMSE=1.1334180680775807, Accuracy=0.329, MAP@K=0.0005241761559696342


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.45s/it]


Epochs=3, n_factors=10, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.88s/it]


Epochs=3, n_factors=20, learning_rate=0.001: RMSE=1.1372480086277565, Accuracy=0.328, MAP@K=0.00029772688060731537


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.87s/it]


Epochs=3, n_factors=20, learning_rate=0.01: RMSE=1.1279079507008258, Accuracy=0.328, MAP@K=8.454106280193236e-05


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.69s/it]


Epochs=3, n_factors=20, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.0008113354037267079


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.01s/it]


Epochs=3, n_factors=50, learning_rate=0.001: RMSE=1.1407995550938346, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.07s/it]


Epochs=3, n_factors=50, learning_rate=0.01: RMSE=1.1239369937246773, Accuracy=0.335, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:13<00:00, 13.76s/it]


Epochs=3, n_factors=50, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.000405236369910283


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.82s/it]


Epochs=10, n_factors=5, learning_rate=0.001: RMSE=1.1347128152284665, Accuracy=0.328, MAP@K=0.0007959152864044168


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.93s/it]


Epochs=10, n_factors=5, learning_rate=0.01: RMSE=1.1324496216423574, Accuracy=0.329, MAP@K=0.0009132375776397516


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.68s/it]


Epochs=10, n_factors=5, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.0010516951345755695


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.78s/it]


Epochs=10, n_factors=10, learning_rate=0.001: RMSE=1.1353685860754918, Accuracy=0.328, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.86s/it]


Epochs=10, n_factors=10, learning_rate=0.01: RMSE=1.1456049065404936, Accuracy=0.326, MAP@K=0.0001754442719116632


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.80s/it]


Epochs=10, n_factors=10, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.0005275189786059352


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.95s/it]


Epochs=10, n_factors=20, learning_rate=0.001: RMSE=1.1332656918708586, Accuracy=0.327, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.87s/it]


Epochs=10, n_factors=20, learning_rate=0.01: RMSE=1.1313971834512522, Accuracy=0.323, MAP@K=0.0005241761559696342


Evaluation fold: 100%|██████████| 1/1 [00:14<00:00, 14.89s/it]


Epochs=10, n_factors=20, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:15<00:00, 15.55s/it]


Epochs=10, n_factors=50, learning_rate=0.001: RMSE=1.134891179761967, Accuracy=0.328, MAP@K=0.00018752156659765357


Evaluation fold: 100%|██████████| 1/1 [00:15<00:00, 15.26s/it]


Epochs=10, n_factors=50, learning_rate=0.01: RMSE=1.1383307417835673, Accuracy=0.341, MAP@K=0.0010483523119392685


Evaluation fold: 100%|██████████| 1/1 [00:15<00:00, 15.08s/it]


Epochs=10, n_factors=50, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00045958419599723946


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.72s/it]


Epochs=30, n_factors=5, learning_rate=0.001: RMSE=1.1360938406369039, Accuracy=0.328, MAP@K=0.000753968253968254


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.91s/it]


Epochs=30, n_factors=5, learning_rate=0.01: RMSE=1.1782114124390242, Accuracy=0.313, MAP@K=0.00035994651483781917


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.80s/it]


Epochs=30, n_factors=5, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.98s/it]


Epochs=30, n_factors=10, learning_rate=0.001: RMSE=1.1343548323155923, Accuracy=0.319, MAP@K=0.0002788561076604555


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.74s/it]


Epochs=30, n_factors=10, learning_rate=0.01: RMSE=1.1642129200080746, Accuracy=0.327, MAP@K=0.0004278812974465148


Evaluation fold: 100%|██████████| 1/1 [00:17<00:00, 17.86s/it]


Epochs=30, n_factors=10, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.11s/it]


Epochs=30, n_factors=20, learning_rate=0.001: RMSE=1.1184675289774038, Accuracy=0.34, MAP@K=0.0005241761559696342


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.41s/it]


Epochs=30, n_factors=20, learning_rate=0.01: RMSE=1.1574101316838323, Accuracy=0.334, MAP@K=0.00013015441683919944


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.10s/it]


Epochs=30, n_factors=20, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.000405236369910283


Evaluation fold: 100%|██████████| 1/1 [00:19<00:00, 19.09s/it]


Epochs=30, n_factors=50, learning_rate=0.001: RMSE=1.1251291662050267, Accuracy=0.337, MAP@K=0.0


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.78s/it]


Epochs=30, n_factors=50, learning_rate=0.01: RMSE=1.1371887272715828, Accuracy=0.348, MAP@K=0.00022979209799861973


Evaluation fold: 100%|██████████| 1/1 [00:18<00:00, 18.69s/it]

Epochs=30, n_factors=50, learning_rate=0.1: RMSE=1.2853015210447702, Accuracy=0.312, MAP@K=0.00022979209799861973





Best model found:

In [7]:
nnmodel = NNColaborativeModel(learning_rate=0.01, n_embed=50, epochs=10, batch_size=1024)
nnmodel.evaluate(review_df, user_df, business_df, short_eval=True, short_eval_train_samples=100_000)

Evaluation fold: 100%|██████████| 1/1 [10:19<00:00, 619.19s/it]


{'rmse': 1.2567935022053274,
 'mae': 0.9297294791340828,
 'accuracy': 0.3495,
 'f1': 0.21502334180449365,
 'precision': 0.313925154998302,
 'recall': 0.23354447453903235,
 'AP@1': 0.0,
 'AP@3': 8.804366966015144e-05,
 'AP@K': 0.00010565240359218173,
 'MAP@K': 7.805490570942235e-05}