In [1]:
%load_ext autoreload
%autoreload 2

# Experiments with WMF followed by context-aware model
First learn user and item factors with WMF (optimal hyperparams known) then learn context factors with CA model.

In [2]:
from pathlib import Path
from functools import partial

from tqdm.auto import tqdm

import src.io as io
import src.evaluation as evaluation
import src.evaluation.splits
import src.evaluation.cars

from src.algorithm.baseline.wmf import WMF
from src.algorithm.cars.itals import iTALS, iTALSs
from src.algorithm.cars.wtf import WTF
from src.algorithm.cars.italsx import iTALSx

## Datasets

In [3]:
DATA_DIR, RETARGET = Path('../../../data/CARS/Food_com/'), False

In [4]:
# Shouldn't need to change this info
INTERACTIONS = DATA_DIR / 'interactions.csv'

SEED = 123456
SEED2 = 78910

ITEM_ID = 'item'
USER_ID = 'user'

MODELS = {
    'iTALS one': partial(iTALS, default_context_1=True, max_iterations=3),
    'iTALS': partial(iTALS, default_context_1=False, max_iterations=3),
    'iTALSs one': partial(iTALSs, default_context_1=True),
    'iTALSs': partial(iTALSs, default_context_1=False),
    'WTF one': partial(WTF, default_context_1=True, max_cg_iter=10),
    'WTF': partial(WTF, default_context_1=False, max_cg_iter=10),
    'iTALSx': iTALSx
}

## Provider of WMF factors for all models

In [5]:
K = 80
WMF_MAX_IT = 10

# depends on dataset
WMF_L2 = 1000
WMF_V = 0.25
WMF_ALPHA = 200

WMF_HYPERPARAMS = {'k': K, 'l2': WMF_L2, 'v': WMF_V, 'alpha': WMF_ALPHA, 'max_iterations': WMF_MAX_IT}

In [6]:
# map seed to learned factors
userItemFactorCache = dict()
def userItemFactorSupplier(data, seed):
    if seed not in userItemFactorCache:
        print("Computing factors WMF")
        wmf = WMF(**WMF_HYPERPARAMS)
        wmf.fit(data.toCSR())
        P, Q = wmf.getUserItemFactors()
        userItemFactorCache[seed] = (P, Q)
        
    return userItemFactorCache[seed]

## Define algorithm and hyperparameter ranges

In [7]:
L2 = [10, 100, 1000, 10000, 100000]
V = [0, 0.25, 0.5, 0.75, 1]
ALPHA = [10, 100, 1000, 10000]


HYPERPARAMS = {'k': K, 'l2': L2, 'v': V, 'alpha': ALPHA}
HYPERPARAMS

{'k': 80,
 'l2': [10, 100, 1000, 10000, 100000],
 'v': [0, 0.25, 0.5, 0.75, 1],
 'alpha': [10, 100, 1000, 10000]}

## Parse data

In [8]:
data = io.parse_interactions_with_context(INTERACTIONS, item_id=ITEM_ID, user_id=USER_ID)

## Make train/val split for hyperparam tuning

In [9]:
train, test = evaluation.splits.context_leave_one_out_split(data, seed=SEED)

## Perform gridsearch on validation set

In [10]:
P, Q = userItemFactorSupplier(train, SEED)

Computing factors WMF


  0%|          | 0/10 [00:00<?, ?it/s]

In [11]:
%%time
modelHyperparams = dict()
for modelName, Model in tqdm(MODELS.items()):
    score, bestHyperparams = evaluation.cars.gridsearchSetFactors(Model, train, test, P, Q, HYPERPARAMS, retarget=RETARGET)
    print(f"Best score of {score} for model {modelName} achieved with {bestHyperparams}.")
    modelHyperparams[modelName] = bestHyperparams

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Training model <src.algorithm.cars.itals.iTALS object at 0x7f5a9d404130> with hyperparameters {'k': 80, 'l2': 10, 'v': 0, 'alpha': 10}


  0%|          | 0/3 [00:00<?, ?it/s]

Evaluating with 22178 users
MRR@5 0.009
MRR@20 0.012
Average Recall@5 0.017
Average Recall@20 0.046
Training model <src.algorithm.cars.itals.iTALS object at 0x7f5a9d404f70> with hyperparameters {'k': 80, 'l2': 100, 'v': 0, 'alpha': 10}


  0%|          | 0/3 [00:00<?, ?it/s]

Evaluating with 22178 users
MRR@5 0.011
MRR@20 0.013
Average Recall@5 0.019
Average Recall@20 0.044
Training model <src.algorithm.cars.itals.iTALS object at 0x7f5a9d70c070> with hyperparameters {'k': 80, 'l2': 1000, 'v': 0, 'alpha': 10}


  0%|          | 0/3 [00:00<?, ?it/s]


KeyboardInterrupt



## Evaluate models with optimal hyperparams with KFolds Cross validation

In [12]:
%%time

for modelName, Model in MODELS.items():
    print()
    bestHyperparams = modelHyperparams[modelName]
    alg = Model(**bestHyperparams)
    results = evaluation.cars.contextKFoldsSetFactorsEval(alg, data, userItemFactorSupplier, nr_folds=5, seed=SEED2, retarget=RETARGET)
    print("Model:", modelName)
    print("Hyperparams:", bestHyperparams)
    evaluation.cars.printKfoldsMetrics(results)




KeyError: 'iTALS one'