In [1]:
import logging, logging.config
import yaml

In [2]:
import datasets as ds
import rec_ops
import algorithms
from lenskit import crossfold as xf
from lenskit import batch, topn
import os
from os import path
from os import makedirs

In [3]:
with open('logging.yaml') as lf:
    log_config = yaml.load(lf)

In [4]:
if not os.path.exists('./build'):
    try:
        os.makedirs('./build')
        print('path ./build created')
    except:
        pass

In [5]:
logging.config.dictConfig(log_config)
_log = logging.getLogger('eval_error.runner')

In [6]:
ml_100k = getattr(ds, 'ml_100k')
ratings = ml_100k()

In [7]:
splits = xf.sample_users(ratings, 1, 100, xf.SampleFrac(0.1))

In [8]:
train, test = next(splits)

INFO lenskit.crossfold sampling 943 users into 1 partitions (n=100)


In [9]:
len(train.user.unique())

943

In [10]:
len(test.user.unique())

100

In [11]:
len(ratings.user.unique())

943

In [12]:
train.to_csv('build/train.csv', index=False)
test.to_csv('build/test.csv', index=False)

In [13]:
algo_names = ['item-item', 'funksvd']

In [14]:
from tqdm import tqdm

In [15]:
for algo_name in tqdm(algo_names):
    algo = getattr(algorithms, algo_name.replace('-', '_'))
    _log.info('training %s on %s with %d rows', algo, ml_100k, len(train))
    model = algo.train(train)
    users = test.user.unique()
    recs = batch.recommend(algo, model, users, 100, topn.UnratedCandidates(train))
    recs.to_csv('build/recommendations_{}.csv'.format(algo_name), index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

INFO lenskit.algorithms.item_knn [ 1.12s] made sparse matrix for 1679 items (99035 ratings)
INFO lenskit.algorithms.item_knn [ 1.13s] computed means for 1679 items
INFO lenskit.algorithms.item_knn [ 1.19s] normalized user-item ratings
INFO lenskit.algorithms.item_knn [ 1.20s] computing similarity matrix
INFO lenskit._mkl_ops Loaded MKL
INFO lenskit.algorithms.item_knn [ 1.32s] multiplying matrix
INFO lenskit.algorithms.item_knn [ 1.37s] filtering similarities
INFO lenskit.algorithms.item_knn [ 1.38s] filter keeps 600432 of 887436 entries
INFO lenskit.algorithms.item_knn [ 1.42s] making matrix symmetric (600432 nnz)
INFO lenskit.algorithms.item_knn [ 1.43s] ordering similarities
INFO lenskit.algorithms.item_knn [ 2.11s] got neighborhoods for 1514 of 1679 items
INFO lenskit.algorithms.item_knn [ 2.11s] computed 1200864 neighbor pairs


 50%|█████     | 1/2 [00:04<00:04,  4.47s/it]

INFO lenskit.algorithms.funksvd [ 0ms] training bias model
INFO lenskit.algorithms.basic building bias model for 99035 ratings
INFO lenskit.algorithms.basic global mean: 3.530
INFO lenskit.algorithms.basic computed means for 1679 items
INFO lenskit.algorithms.basic computed means for 943 users
INFO lenskit.algorithms.funksvd [ 50ms] preparing rating data for 99035 samples
INFO lenskit.algorithms.funksvd [ 1.79s] training biased MF model with 15 features
INFO lenskit.algorithms.funksvd [ 2.85s] finished feature 0 (RMSE=0.912326) in 1.06s
INFO lenskit.algorithms.funksvd [ 3.04s] finished feature 1 (RMSE=0.901076) in 0.18s
INFO lenskit.algorithms.funksvd [ 3.22s] finished feature 2 (RMSE=0.895816) in 0.17s
INFO lenskit.algorithms.funksvd [ 3.40s] finished feature 3 (RMSE=0.894137) in 0.18s
INFO lenskit.algorithms.funksvd [ 3.58s] finished feature 4 (RMSE=0.892832) in 0.18s
INFO lenskit.algorithms.funksvd [ 3.76s] finished feature 5 (RMSE=0.891723) in 0.18s
INFO lenskit.algorithms.funksvd 

100%|██████████| 2/2 [00:10<00:00,  4.98s/it]
