In [18]:
%load_ext autoreload
%autoreload 2

from data_loading.loo_data_loader import DesignatedDataLoader
data_loader = DesignatedDataLoader.load_from(
    path='../data_loading/mindreader',
    movies_only=False,
    min_num_entity_ratings=3,
    filter_unknowns=True
)

data_loader.random_seed = 2

print(data_loader.info())

train, validation, test = data_loader.make(
    movie_to_entity_ratio=1,
    replace_movies_with_descriptive_entities=False,
    n_negative_samples=100
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Init dataloader with 36526 ratings
 
            DataLoader Information
            -----------------------------------
            n_users:                      932
            n_movies:                     1459
            n_descriptive_entities:       732

            n_ratings:                    36526
            n_movie_ratings:              15691
            n_descriptive_entity_ratings: 20835
        
Asserting positive samples not in training set for each user...
Asserting negative samples occurrence in training set, but not rated for each user...
Asserting positive samples do not occur in negative samples...
Returning a dataset over 930 users.


In [19]:
from sklearn.utils.extmath import randomized_svd
import numpy as np
import scipy
from loguru import logger
import operator
from utility.utility import csr
ratings = csr(train)


for k in range(1, 11):
    U, sigma, VT = randomized_svd(ratings, k, n_iter=20)
    sigma = scipy.sparse.diags(sigma, 0)
    U = U * sigma
    V = VT.T
    
    def recommend(users):
        ufactors = U[users, :]
        return np.dot(ufactors, V.T)
    recommend(0)
    
    hits = 0
    count = 0
    
    for user, validation_tuple in validation:
        to_find, negative = validation_tuple
    
        to_rank = [to_find] + negative
        
        scores = recommend(user)
        item_scores = sorted(list(enumerate(scores)), key=operator.itemgetter(1), reverse=True)
        top_k = [item for item, score in item_scores if item in to_rank][:10]
    
        if to_find in top_k:
            hits += 1
        count += 1
    
    logger.info(f'{k}: Hit: {hits / count * 100}')

2019-12-19 22:02:55.099 | INFO     | __main__:<module>:37 - 1: Hit: 20.537634408602152
2019-12-19 22:02:57.867 | INFO     | __main__:<module>:37 - 2: Hit: 20.0
2019-12-19 22:03:00.833 | INFO     | __main__:<module>:37 - 3: Hit: 17.741935483870968
2019-12-19 22:03:04.049 | INFO     | __main__:<module>:37 - 4: Hit: 17.741935483870968
2019-12-19 22:03:11.972 | INFO     | __main__:<module>:37 - 5: Hit: 17.20430107526882
2019-12-19 22:03:19.429 | INFO     | __main__:<module>:37 - 6: Hit: 17.20430107526882
2019-12-19 22:03:26.371 | INFO     | __main__:<module>:37 - 7: Hit: 16.559139784946236
2019-12-19 22:03:32.927 | INFO     | __main__:<module>:37 - 8: Hit: 16.559139784946236
2019-12-19 22:03:39.280 | INFO     | __main__:<module>:37 - 9: Hit: 17.634408602150536
2019-12-19 22:03:45.765 | INFO     | __main__:<module>:37 - 10: Hit: 17.526881720430108


In [20]:
from models.bpr import BPR
from models.bpr_recommender import BPRRecommender
import operator

bpr = BPRRecommender()
bpr.fit(train, validation)

2019-12-19 22:03:45.810 | INFO     | models.bpr_recommender:fit:32 - 16 parameters to search
2019-12-19 22:03:45.832 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 1, 'batch_size': 8}
2019-12-19 22:03:52.165 | INFO     | models.bpr_recommender:fit:55 - Hit: 15.268817204301074
2019-12-19 22:03:52.166 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 1, 'batch_size': 16}
2019-12-19 22:03:57.292 | INFO     | models.bpr_recommender:fit:55 - Hit: 13.440860215053762
2019-12-19 22:03:57.293 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 2, 'batch_size': 8}
2019-12-19 22:04:03.993 | INFO     | models.bpr_recommender:fit:55 - Hit: 16.881720430107528
2019-12-19 22:04:03.994 | INFO     | models.bpr_recommender:fit:37 - {'reg': 0.001, 'learning_rate': 0.1, 'n_iters': 200, 'n_factors': 2, 'batch_size': 16}
2019-1