In [4]:
%load_ext autoreload
%autoreload 2

from data_loading.loo_data_loader import DesignatedDataLoader
data_loader = DesignatedDataLoader.load_from(
    path='../data_loading/mindreader',
    movies_only=False,
    min_num_entity_ratings=1,
    filter_unknowns=True
)

data_loader.random_seed = 2

print(data_loader.info())

train, validation, test = data_loader.make(
    movie_to_entity_ratio=0.5,
    replace_movies_with_descriptive_entities=True,
    n_negative_samples=100
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Init dataloader with 40138 ratings
 
            DataLoader Information
            -----------------------------------
            n_users:                      932
            n_movies:                     3001
            n_descriptive_entities:       1974

            n_ratings:                    40138
            n_movie_ratings:              17749
            n_descriptive_entity_ratings: 22389
        
Asserting positive samples not in training set for each user...
Asserting negative samples occurrence in training set, but not rated for each user...
Asserting positive samples do not occur in negative samples...
Returning a dataset over 927 users.


In [5]:
from sklearn.utils.extmath import randomized_svd
import numpy as np
import scipy
from loguru import logger
import operator
from utility.utility import csr
ratings = csr(train)


for k in range(1, 11):
    U, sigma, VT = randomized_svd(ratings, k, n_iter=20)
    sigma = scipy.sparse.diags(sigma, 0)
    U = U * sigma
    V = VT.T
    
    def recommend(users):
        ufactors = U[users, :]
        return np.dot(ufactors, V.T)
    recommend(0)
    
    hits = 0
    count = 0
    
    for user, validation_tuple in validation:
        to_find, negative = validation_tuple
    
        to_rank = [to_find] + negative
        
        scores = recommend(user)
        item_scores = sorted(list(enumerate(scores)), key=operator.itemgetter(1), reverse=True)
        top_k = [item for item, score in item_scores if item in to_rank][:10]
    
        if to_find in top_k:
            hits += 1
        count += 1
    
    logger.info(f'{k}: Hit: {hits / count * 100}')

2019-12-19 21:15:19.275 | INFO     | __main__:<module>:37 - 1: Hit: 11.758360302049622
2019-12-19 21:15:34.763 | INFO     | __main__:<module>:37 - 2: Hit: 10.895361380798274
2019-12-19 21:15:51.959 | INFO     | __main__:<module>:37 - 3: Hit: 11.542610571736784
2019-12-19 21:16:07.443 | INFO     | __main__:<module>:37 - 4: Hit: 9.70873786407767
2019-12-19 21:16:23.713 | INFO     | __main__:<module>:37 - 5: Hit: 9.492988133764833
2019-12-19 21:16:40.285 | INFO     | __main__:<module>:37 - 6: Hit: 9.816612729234087
2019-12-19 21:16:54.496 | INFO     | __main__:<module>:37 - 7: Hit: 9.385113268608414
2019-12-19 21:17:09.283 | INFO     | __main__:<module>:37 - 8: Hit: 9.385113268608414
2019-12-19 21:17:24.215 | INFO     | __main__:<module>:37 - 9: Hit: 9.60086299892125
2019-12-19 21:17:39.659 | INFO     | __main__:<module>:37 - 10: Hit: 9.924487594390508


In [6]:
from models.bpr import BPR
from models.bpr_recommender import BPRRecommender
import operator

bpr = BPRRecommender()
bpr.fit(train, validation)

2019-12-19 21:17:39.692 | INFO     | models.bpr_recommender:fit:32 - 16 parameters to search
2019-12-19 21:17:39.715 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 1, 'batch_size': 8}
2019-12-19 21:17:46.550 | INFO     | models.bpr_recommender:fit:55 - Hit: 12.513484358144552
2019-12-19 21:17:46.550 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 1, 'batch_size': 16}
2019-12-19 21:17:51.833 | INFO     | models.bpr_recommender:fit:55 - Hit: 12.944983818770226
2019-12-19 21:17:51.833 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 2, 'batch_size': 8}
2019-12-19 21:17:58.853 | INFO     | models.bpr_recommender:fit:55 - Hit: 9.60086299892125
2019-12-19 21:17:58.854 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 2, 'batch_size': 16}
2019-12-