Skip to content

Commit

Permalink
Merge pull request #56 from tqtg/master
Browse files Browse the repository at this point in the history
Improve MatrixTrainSet
  • Loading branch information
tqtg committed Mar 13, 2019
2 parents 0721473 + 957dea2 commit b6f39b4
Show file tree
Hide file tree
Showing 23 changed files with 3,734 additions and 3,588 deletions.
13 changes: 6 additions & 7 deletions .codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ coverage:
# project coverage decrease by more than 1%:
target: auto
threshold: 1%
patch:
default:
# Be tolerant on slight code coverage diff on PRs to limit
# noisy red coverage status on github PRs.
target: auto
threshold: 1%

# patch:
# default:
# # Be tolerant on slight code coverage diff on PRs to limit
# # noisy red coverage status on github PRs.
# target: auto
# threshold: 1%
15 changes: 7 additions & 8 deletions cornac/data/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def __init__(self, **kwargs):
self.matrix = None
self.map_data = []

def _build_triplet(self, ordered_ids):
"""Build adjacency matrix in sparse triplet format using maped ids
def _build_triplet(self, global_id_map):
"""Build adjacency matrix in sparse triplet format using mapped ids
"""

for i, j, val in self.raw_data:
self.map_data.append([ordered_ids[i], ordered_ids[j], val])
self.map_data.append([global_id_map[i], global_id_map[j], val])
self.map_data = np.asanyarray(self.map_data)
self.raw_data = None

Expand All @@ -50,18 +50,17 @@ def get_train_triplet(self, train_row_ids, train_col_ids):

return np.asarray(train_triplet)

def build(self, ordered_ids):
self._build_triplet(ordered_ids)
def build(self, global_id_map):
self._build_triplet(global_id_map)
self._build_sparse_matrix(self.map_data)

def batch(self, batch_ids):

"""Collaborative Context Poisson Factorization.
"""Return batch of vectors from the sparse adjacency matrix corresponding to provided batch_ids.
Parameters
----------
batch_ids: array, required
An array conting the ids of rows to be returned from the sparse adjacency matrix.
An array contains the ids of rows to be returned from the sparse adjacency matrix.
"""

return self.matrix[batch_ids]
4 changes: 2 additions & 2 deletions cornac/data/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def data_image(self):
def data_image(self, input_image):
self.__data_image = input_image

def build(self, ordered_ids):
def build(self, global_id_map):
"""Build the model based on provided list of ordered ids
"""
super().build(ordered_ids)
Module.build(self, global_id_map)

def batch_image(self, batch_ids,
target_size=(256, 256),
Expand Down
12 changes: 6 additions & 6 deletions cornac/data/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,25 @@ def feature_dim(self):
def feature_dim(self, input_dim):
self.__feature_dim = input_dim

def _build_feature(self, ordered_ids):
def _build_feature(self, global_id_map):
"""Build data_feature matrix based on provided list of ordered ids
"""
if self._id_feature is None:
return

self.data_feature = np.zeros((len(ordered_ids), self.feature_dim))
for map_id, raw_id in enumerate(ordered_ids.keys()):
self.data_feature[map_id] = self._id_feature[raw_id]
self.data_feature = np.zeros((len(global_id_map), self.feature_dim))
for mapped_id, raw_id in enumerate(global_id_map.keys()):
self.data_feature[mapped_id] = self._id_feature[raw_id]
if self._normalized:
self.data_feature = self.data_feature - np.min(self.data_feature)
self.data_feature = self.data_feature / (np.max(self.data_feature) + 1e-10)

self._id_feature.clear()

def build(self, ordered_ids):
def build(self, global_id_map):
"""Build the model based on provided list of ordered ids
"""
self._build_feature(ordered_ids)
self._build_feature(global_id_map)

def batch_feature(self, batch_ids):
"""Return a matrix (batch of feature vectors) corresponding to provided batch_ids
Expand Down
9 changes: 5 additions & 4 deletions cornac/data/testset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def __init__(self, user_ratings, uid_map, iid_map):
self._uid_map = uid_map
self._iid_map = iid_map

def get_users(self):
@property
def users(self):
"""Return a list of users"""
return self._user_ratings.keys()

Expand All @@ -45,12 +46,12 @@ def get_iid(self, raw_iid):
return self._iid_map[raw_iid]

@classmethod
def from_uir(self, triplet_data, global_uid_map, global_iid_map, global_ui_set, verbose=False):
def from_uir(self, data, global_uid_map, global_iid_map, global_ui_set, verbose=False):
"""Constructing TestSet from triplet data.
Parameters
----------
triplet_data: array-like, shape: [n_examples, 3]
data: array-like, shape: [n_examples, 3]
Data in the form of triplets (user, item, rating)
global_uid_map: :obj:`defaultdict`
Expand Down Expand Up @@ -79,7 +80,7 @@ def from_uir(self, triplet_data, global_uid_map, global_iid_map, global_ui_set,
unk_user_count = 0
unk_item_count = 0

for raw_uid, raw_iid, rating in triplet_data:
for raw_uid, raw_iid, rating in data:
if (raw_uid, raw_iid) in global_ui_set: # duplicate rating
continue
global_ui_set.add((raw_uid, raw_iid))
Expand Down
2 changes: 1 addition & 1 deletion cornac/data/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ class TextModule(Module):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def build(self, ordered_ids):
def build(self, global_id_map):
pass
48 changes: 32 additions & 16 deletions cornac/data/trainset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,26 @@ def num_items(self):
"""Return the number of items"""
return len(self._iid_map)

@property
def uid_list(self):
"""Return the list of mapped user ids"""
return self._uid_map.values()

@property
def raw_uid_list(self):
"""Return the list of raw user ids"""
return self._uid_map.keys()

@property
def iid_list(self):
"""Return the list of mapped item ids"""
return self._iid_map.values()

@property
def raw_iid_list(self):
"""Return the list of raw item ids"""
return self._iid_map.keys()

def is_unk_user(self, mapped_uid):
"""Return whether or not a user is unknown given the mapped id"""
return mapped_uid >= self.num_users
Expand All @@ -51,21 +71,6 @@ def get_iid(self, raw_iid):
"""Return the mapped id of an item given a raw id"""
return self._iid_map[raw_iid]

def get_uid_list(self):
"""Return the list of mapped user ids"""
return self._uid_map.values()

def get_raw_uid_list(self):
"""Return the list of raw user ids"""
return self._uid_map.keys()

def get_iid_list(self):
"""Return the list of mapped item ids"""
return self._iid_map.values()

def get_raw_iid_list(self):
"""Return the list of raw item ids"""
return self._iid_map.keys()

@staticmethod
def idx_iter(idx_range, batch_size=1, shuffle=False):
Expand Down Expand Up @@ -133,7 +138,18 @@ def __init__(self, matrix, max_rating, min_rating, global_mean, uid_map, iid_map
@property
def uir_tuple(self):
if not self.__uir_tuple:
self.__uir_tuple = find(self.matrix)
# rating matrix is assumed in the CSR format
if not self.matrix.has_sorted_indices:
self.matrix.sort_indices()

num_users, num_items = self.matrix.shape

# this basically calculates the 'row' attribute of a COO matrix
# without requiring us to get the whole COO matrix
user_counts = np.ediff1d(self.matrix.indptr)
user_ids = np.repeat(np.arange(num_users), user_counts).astype(self.matrix.indices.dtype)

self.__uir_tuple = (user_ids, self.matrix.indices, self.matrix.data)
return self.__uir_tuple

@uir_tuple.setter
Expand Down
6 changes: 3 additions & 3 deletions cornac/eval_methods/base_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,12 @@ def _build_modules(self):
for user_module in [self.user_text, self.user_image, self.user_graph]:
if user_module is None:
continue
user_module.build(ordered_ids=self.global_uid_map)
user_module.build(global_id_map=self.global_uid_map)

for item_module in [self.item_text, self.item_image, self.item_graph]:
if item_module is None:
continue
item_module.build(ordered_ids=self.global_iid_map)
item_module.build(global_id_map=self.global_iid_map)

for data_set in [self.train_set, self.test_set, self.val_set]:
if data_set is None: continue
Expand Down Expand Up @@ -254,7 +254,7 @@ def evaluate(self, model, metrics, user_based):
for mt in (rating_metrics + ranking_metrics):
metric_user_results[mt.name] = {}

for user_id in tqdm.tqdm(self.test_set.get_users(), disable=not self.verbose):
for user_id in tqdm.tqdm(self.test_set.users, disable=not self.verbose):
# ignore unknown users when self.exclude_unknown
if self.exclude_unknowns and self.train_set.is_unk_user(user_id):
continue
Expand Down
28 changes: 14 additions & 14 deletions cornac/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
This directory includes the implementation of all the models (listed below) supported in Cornac.
Additional dependencies (CPU versions) for each model are also listed accordingly.

| Model | Additional Dependencies |
| --- | :---: |
| [Bayesian Personalized Ranking (BPR)](bpr) | N/A |
| [Collaborative Context Poisson Factorization (C2PF)](c2pf)| N/A |
| [Collaborative Deep Learning (CDL)](cdl) | [requirements.txt](cdl/requirements.txt) |
| [Collaborative Ordinal Embedding (COE)](coe) | [requirements.txt](coe/requirements.txt) |
| [Hierarchical Poisson Factorization (HPF)](hpf) | N/A |
| [Indexable Bayesian Personalized Ranking (IBPR)](ibpr) | [requirements.txt](ibpr/requirements.txt) |
| [Matrix Factorization (MF)](mf) | N/A |
| [Online Indexable Bayesian Personalized Ranking (Online IBPR)](online_ibpr) | [requirements.txt](online_ibpr/requirements.txt) |
| [Probabilistic Collaborative Representation Learning (PCRL)](pcrl) | [requirements.txt](pcrl/requirements.txt) |
| [Probabilistic Matrix Factorization (PMF)](pmf) | N/A |
| [Spherical K-means (SKM)](skm) | N/A |
| [Visual Bayesian Personalized Ranking (VBPR)](vbpr) | [requirements.txt](vbpr/requirements.txt) |
| Model and paper | Additional dependencies | Examples |
| --- | :---: | :---: |
| [Bayesian Personalized Ranking (BPR)](bpr), [paper](https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf) | N/A | [bpr_netflix.py](../../examples/bpr_netflix.py)
| [Collaborative Context Poisson Factorization (C2PF)](c2pf), [paper](https://www.ijcai.org/proceedings/2018/0370.pdf) | N/A | [c2pf_example.py](../../examples/c2pf_example.py)
| [Collaborative Deep Learning (CDL)](cdl), [paper](https://arxiv.org/pdf/1409.2944.pdf) | [requirements.txt](cdl/requirements.txt) |
| [Collaborative Ordinal Embedding (COE)](coe), [paper](http://www.hadylauw.com/publications/sdm16.pdf) | [requirements.txt](coe/requirements.txt) |
| [Hierarchical Poisson Factorization (HPF)](hpf), [paper](http://jakehofman.com/inprint/poisson_recs.pdf) | N/A |
| [Indexable Bayesian Personalized Ranking (IBPR)](ibpr), [paper](http://www.hadylauw.com/publications/cikm17a.pdf) | [requirements.txt](ibpr/requirements.txt) | [ibpr_example.py](../../examples/ibpr_example.py)
| [Matrix Factorization (MF)](mf), [paper](https://datajobs.com/data-science-repo/Recommender-Systems-[Netflix].pdf) | N/A |
| [Online Indexable Bayesian Personalized Ranking (Online IBPR)](online_ibpr), [paper](http://www.hadylauw.com/publications/cikm17a.pdf) | [requirements.txt](online_ibpr/requirements.txt) |
| [Probabilistic Collaborative Representation Learning (PCRL)](pcrl), [paper](http://www.hadylauw.com/publications/uai18.pdf) | [requirements.txt](pcrl/requirements.txt) |
| [Probabilistic Matrix Factorization (PMF)](pmf), [paper](https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf) | N/A | [biased_mf.py](../../examples/biased_mf.py), [given_data.py](../../examples/given_data.py)
| [Spherical K-means (SKM)](skm), [paper](https://www.sciencedirect.com/science/article/pii/S092523121501509X) | N/A |
| [Visual Bayesian Personalized Ranking (VBPR)](vbpr), [paper](https://arxiv.org/pdf/1510.01784.pdf) | [requirements.txt](vbpr/requirements.txt) | [vbpr_tradesy.py](../../examples/vbpr_tradesy.py)

0 comments on commit b6f39b4

Please sign in to comment.