In [1]:
%load_ext autoreload
%autoreload 2

# Experiments with EASE model

In [2]:
from pathlib import Path

import scipy.sparse
import numpy as np

import src.io as io
import src.evaluation as evaluation
import src.evaluation.splits
import src.evaluation.base

from src.algorithm.baseline.ease import EASE

## Datasets

In [3]:
## Uncomment corresponding directory.
## RETARGET True for Frappe, False for the others

DATA_DIR, RETARGET = Path('../../../data/CARS/Mobile_Frappe/'), True
# DATA_DIR, RETARGET = Path('../../../data/CARS/Food_com/'), False
# DATA_DIR, RETARGET = Path('../../../data/CARS/TripAdvisor/'), False

In [4]:
# Shouldn't need to change this info
INTERACTIONS = DATA_DIR / 'interactions.csv'

SEED = 123456
SEED2 = 78910

ITEM_ID = 'item'
USER_ID = 'user'

## Define algorithm and hyperparameter ranges

In [5]:
ALG = EASE

# L2 = [10**p for p in range(1, 5)]
L2 = [0.1, 0.5, 1, 5, 10, 15, 20, 50]

HYPERPARAMS = {'l2': L2}
HYPERPARAMS

{'l2': [0.1, 0.5, 1, 5, 10, 15, 20, 50]}

## Parse data

In [6]:
data = io.parse_interactions_with_context(INTERACTIONS, item_id=ITEM_ID, user_id=USER_ID)
data.df

Unnamed: 0,userId,itemId,daytime,weather,weekday
0,0,0,1,1,1
1,1,1,2,2,2
2,2,2,3,2,3
3,3,3,4,0,4
4,4,4,5,1,4
...,...,...,...,...,...
95997,109,0,3,1,1
95998,37,16,4,2,1
95999,180,33,2,2,1
96000,445,751,3,1,1


## Make train/val split for hyperparam tuning

In [7]:
Xtrain, Xval_in, Xval_out = evaluation.splits.leave_one_out_split_non_context(data, seed=SEED)

## Perform gridsearch on validation set

In [8]:
%%time
score, best_hyperparams = evaluation.base.gridsearch(ALG, Xtrain, Xval_in, Xval_out, HYPERPARAMS, retarget=RETARGET)
f"Best score of {score} achieved with {best_hyperparams}."

  0%|          | 0/8 [00:00<?, ?it/s]

Training model EASE with hyperparameters {'l2': 0.1}
Evaluating with 816 users
MRR@5 0.195
MRR@20 0.22
Average Recall@5 0.369
Average Recall@20 0.604
Training model EASE with hyperparameters {'l2': 0.5}
Evaluating with 816 users
MRR@5 0.196
MRR@20 0.222
Average Recall@5 0.362
Average Recall@20 0.609
Training model EASE with hyperparameters {'l2': 1}
Evaluating with 816 users
MRR@5 0.196
MRR@20 0.223
Average Recall@5 0.357
Average Recall@20 0.604
Training model EASE with hyperparameters {'l2': 5}
Evaluating with 816 users
MRR@5 0.185
MRR@20 0.213
Average Recall@5 0.344
Average Recall@20 0.608
Training model EASE with hyperparameters {'l2': 10}
Evaluating with 816 users
MRR@5 0.165
MRR@20 0.195
Average Recall@5 0.314
Average Recall@20 0.603
Training model EASE with hyperparameters {'l2': 15}
Evaluating with 816 users
MRR@5 0.155
MRR@20 0.187
Average Recall@5 0.292
Average Recall@20 0.6
Training model EASE with hyperparameters {'l2': 20}
Evaluating with 816 users
MRR@5 0.148
MRR@20 0.18
A

"Best score of 0.22250349596674832 achieved with {'l2': 1}."

## Evaluate model with optimal hyperparams with KFolds Cross validation

In [9]:
%%time
alg = ALG(**best_hyperparams)
results = evaluation.base.kFoldsEval(alg, data, nr_folds=5, seed=SEED2, retarget=RETARGET)

Evaluating with 816 users
Evaluating with 816 users
Evaluating with 816 users
Evaluating with 816 users
Evaluating with 816 users
MRR@5 0.179 (0.006)
MRR@20 0.204 (0.007)
Average Recall@5 0.345 (0.011)
Average Recall@20 0.584 (0.014)
CPU times: user 2min 10s, sys: 29.8 s, total: 2min 39s
Wall time: 12.8 s
