Skip to content

Commit

Permalink
Refactor data modules (#59)
Browse files Browse the repository at this point in the history
* Refactor data modules

* Update VBPR model according to changes in data modules

* Update tests for data modules
  • Loading branch information
tqtg committed Mar 20, 2019
1 parent a01af60 commit 793bef7
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 51 deletions.
4 changes: 2 additions & 2 deletions cornac/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .module import Module
from .module import FeatureModule
from .text import TextModule
from .image import ImageModule
from .graph import GraphModule
Expand All @@ -9,7 +9,7 @@
from .testset import MultimodalTestSet
from . import reader

__all__ = ['Module',
__all__ = ['FeatureModule',
'TextModule',
'ImageModule',
'GraphModule',
Expand Down
4 changes: 2 additions & 2 deletions cornac/data/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
@author: Aghiles Salah <asalah@smu.edu.sg>
"""

from . import Module
from . import FeatureModule
import scipy.sparse as sp
import numpy as np


class GraphModule(Module):
class GraphModule(FeatureModule):
"""Graph module
"""

Expand Down
8 changes: 4 additions & 4 deletions cornac/data/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
@author: Quoc-Tuan Truong <tuantq.vnu@gmail.com>
"""

from . import Module
from . import FeatureModule


class ImageModule(Module):
class ImageModule(FeatureModule):
"""Image module
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)

self._id_image = kwargs.get('id_image', None)
self._id_path = kwargs.get('id_path', None)

self.data_image = None
self.data_path = None

Expand All @@ -31,7 +31,7 @@ def data_image(self, input_image):
def build(self, global_id_map):
"""Build the model based on provided list of ordered ids
"""
Module.build(self, global_id_map)
FeatureModule.build(self, global_id_map)

def batch_image(self, batch_ids,
target_size=(256, 256),
Expand Down
57 changes: 30 additions & 27 deletions cornac/data/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,55 +9,58 @@

class Module:
"""Module
"""

def __init__(self, **kwargs):
pass


class FeatureModule(Module):
"""FeatureModule
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)

self._id_feature = kwargs.get('id_feature', None)
self._normalized = kwargs.get('normalized', False)

self.data_feature = None
self.features = None
if self._id_feature is not None:
first_id = list(self._id_feature.keys())[0]
self.feature_dim = self._id_feature[first_id].shape[0]
self.__feat_dim = next(iter(self._id_feature.values())).shape[0]

@property
def data_feature(self):
return self.__data_feature
def features(self):
"""Return the whole feature matrix
"""
return self.__features

@data_feature.setter
def data_feature(self, input_feature):
self.__data_feature = input_feature
@features.setter
def features(self, input_features):
self.__features = input_features

@property
def feature_dim(self):
return self.__feature_dim

@feature_dim.setter
def feature_dim(self, input_dim):
self.__feature_dim = input_dim
def feat_dim(self):
"""Return the dimensionality of the feature vectors
"""
return self.__feat_dim

def _build_feature(self, global_id_map):
"""Build data_feature matrix based on provided list of ordered ids
def build(self, global_id_map):
"""Build the features based on provided global id map
"""
if self._id_feature is None:
return

self.data_feature = np.zeros((len(global_id_map), self.feature_dim))
self.features = np.zeros((len(global_id_map), self.feat_dim))
for mapped_id, raw_id in enumerate(global_id_map.keys()):
self.data_feature[mapped_id] = self._id_feature[raw_id]
self.features[mapped_id] = self._id_feature[raw_id]
if self._normalized:
self.data_feature = self.data_feature - np.min(self.data_feature)
self.data_feature = self.data_feature / (np.max(self.data_feature) + 1e-10)
self.features = self.features - np.min(self.features)
self.features = self.features / (np.max(self.features) + 1e-10)

self._id_feature.clear()

def build(self, global_id_map):
"""Build the model based on provided list of ordered ids
"""
self._build_feature(global_id_map)

def batch_feature(self, batch_ids):
def batch_feat(self, batch_ids):
"""Return a matrix (batch of feature vectors) corresponding to provided batch_ids
"""
return self.data_feature[batch_ids]
return self.features[batch_ids]
4 changes: 2 additions & 2 deletions cornac/data/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
@author: Quoc-Tuan Truong <tuantq.vnu@gmail.com>
"""

from . import Module
from . import FeatureModule


class TextModule(Module):
class TextModule(FeatureModule):
"""Text module
"""
Expand Down
10 changes: 5 additions & 5 deletions cornac/models/vbpr/recom_vbpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ def _load_or_randn(self, size, init_values=None):
tensor = torch.tensor(init_values, requires_grad=True, device=self.device)
return tensor

def _init_params(self, n_users, n_items, feature_dim):
def _init_params(self, n_users, n_items, feat_dim):
Bi = self._load_or_randn((n_items), init_values=self.beta_item)
Gu = self._load_or_randn((n_users, self.k), init_values=self.gamma_user)
Gi = self._load_or_randn((n_items, self.k), init_values=self.gamma_item)
Tu = self._load_or_randn((n_users, self.k2), init_values=self.theta_user)
E = self._load_or_randn((feature_dim, self.k2), init_values=self.emb_matrix)
Bp = self._load_or_randn((feature_dim, 1), init_values=self.beta_prime)
E = self._load_or_randn((feat_dim, self.k2), init_values=self.emb_matrix)
Bp = self._load_or_randn((feat_dim, 1), init_values=self.beta_prime)

return Bi, Gu, Gi, Tu, E, Bp

Expand Down Expand Up @@ -128,13 +128,13 @@ def fit(self, train_set):
raise CornacException('item_image module is required but None.')

# Item visual feature from CNN
self.item_feature = train_set.item_image.data_feature[:self.train_set.num_items]
self.item_feature = train_set.item_image.features[:self.train_set.num_items]
F = torch.from_numpy(self.item_feature).float().to(self.device)

# Learned parameters
Bi, Gu, Gi, Tu, E, Bp = self._init_params(n_users=train_set.num_users,
n_items=train_set.num_items,
feature_dim=train_set.item_image.feature_dim)
feat_dim=train_set.item_image.feat_dim)
optimizer = torch.optim.Adam([Bi, Gu, Gi, Tu, E, Bp], lr=self.learning_rate)

for epoch in range(1, self.n_epochs + 1):
Expand Down
18 changes: 9 additions & 9 deletions tests/cornac/data/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,37 @@
@author: Quoc-Tuan Truong <tuantq.vnu@gmail.com>
"""

from cornac.data import Module
from cornac.data import FeatureModule
import numpy as np
from collections import OrderedDict


def test_init():
md = Module()
md = FeatureModule()
md.build(global_id_map=None)
assert md.data_feature is None
assert md.features is None

id_feature = {'a': np.zeros(10)}
md = Module(id_feature=id_feature, normalized=True)
md = FeatureModule(id_feature=id_feature, normalized=True)

global_iid_map = OrderedDict()
global_iid_map.setdefault('a', len(global_iid_map))
md.build(global_id_map=global_iid_map)

assert md.data_feature.shape[0] == 1
assert md.data_feature.shape[1] == 10
assert md.feature_dim == 10
assert md.features.shape[0] == 1
assert md.features.shape[1] == 10
assert md.feat_dim == 10
assert len(md._id_feature) == 0


def test_batch_feature():
id_feature = {'a': np.zeros(10)}
md = Module(id_feature=id_feature, normalized=True)
md = FeatureModule(id_feature=id_feature, normalized=True)

global_iid_map = OrderedDict()
global_iid_map.setdefault('a', len(global_iid_map))
md.build(global_id_map=global_iid_map)

b = md.batch_feature(batch_ids=[0])
b = md.batch_feat(batch_ids=[0])
assert b.shape[0] == 1
assert b.shape[1] == 10

0 comments on commit 793bef7

Please sign in to comment.