Skip to content

Commit

Permalink
Merge pull request #64 from saghiles/master
Browse files Browse the repository at this point in the history
 Reorganize: results, Add: Amazon Office datasets, Tests Graph module and office data
  • Loading branch information
saghiles committed Mar 22, 2019
2 parents fcc5bfd + 9bd66a0 commit 177508e
Show file tree
Hide file tree
Showing 14 changed files with 6,632 additions and 5,859 deletions.
2 changes: 1 addition & 1 deletion cornac/data/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _build_triplet(self, global_id_map):
for i, j, val in self.raw_data:
self.map_data.append([global_id_map[i], global_id_map[j], val])
self.map_data = np.asanyarray(self.map_data)
self.raw_data = None
#self.raw_data = None

def _build_sparse_matrix(self, triplet):
"""Build sparse adjacency matrix
Expand Down
55 changes: 55 additions & 0 deletions cornac/datasets/amazon_office.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-

"""
@author: Aghiles Salah <asalah@smu.edu.sg>
This data is built based on the Amazon datasets provided by Julian McAuley at: http://jmcauley.ucsd.edu/data/amazon/
"""

from ..utils import validate_format
from ..utils import cache
from ..data import reader

VALID_DATA_FORMATS = ['UIR', 'UIRT']


def load_rating(data_format='UIR'):
"""Load the user-item ratings
Parameters
----------
data_format: str, default: 'UIR'
Data format to be returned.
Returns
-------
data: array-like
Data in the form of a list of tuples depending on the specified data format.
"""

data_format = validate_format(data_format, VALID_DATA_FORMATS)
fpath = cache(url='https://static.preferred.ai/cornac/datasets/amazon_office/rating.txt',
relative_path='amazon_office/rating.txt')
if data_format == 'UIR':
return reader.read_uir(fpath,sep=' ')


def load_context(data_format='UIR'):
"""Load the item-item interactions
Parameters
----------
data_format: str, default: 'UIR'
Data format to be returned.
Returns
-------
data: array-like
Data in the form of a list of tuples depending on the specified data format.
"""

data_format = validate_format(data_format, VALID_DATA_FORMATS)
fpath = cache(url='https://static.preferred.ai/cornac/datasets/amazon_office/context.txt',
relative_path='amazon_office/context.txt')
if data_format == 'UIR':
return reader.read_uir(fpath,sep=' ')
4 changes: 2 additions & 2 deletions cornac/eval_methods/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@ def _next_fold(self):
self.current_fold = 0

def evaluate(self, model, metrics, user_based):
result = CVSingleModelResult()
result = CVSingleModelResult(model.name, metrics)

for fold in range(self.n_folds):
self._get_train_test()
avg_res, per_user_res = BaseMethod.evaluate(self, model, metrics, user_based)
result._add_fold_res(fold=fold, metric_avg_results=avg_res)
result.add_fold_res(fold=fold, metric_avg_results=avg_res)
self._next_fold()
result._compute_avg_res()
return result
4 changes: 3 additions & 1 deletion cornac/eval_methods/ratio_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,6 @@ def split(self):
def evaluate(self, model, metrics, user_based):
self.split()
metric_avg_results, per_user_results = BaseMethod.evaluate(self, model, metrics, user_based)
return SingleModelResult(metric_avg_results, per_user_results)
res = SingleModelResult(model.name, metrics, metric_avg_results, per_user_results)
res.organize_avg_res()
return res
8 changes: 2 additions & 6 deletions cornac/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, eval_method, models, metrics, user_based=True, verbose=False)
self.verbose = verbose
from ..eval_methods.ratio_split import RatioSplit
from ..eval_methods.cross_validation import CrossValidation
if isinstance(eval_method, RatioSplit):
if isinstance(self.eval_method, RatioSplit):
self.results = Result()
elif isinstance(eval_method, CrossValidation):
self.results = CVResult(eval_method.n_folds)
Expand Down Expand Up @@ -86,7 +86,6 @@ def dict_depth(self, d):

# modify this function to accommodate several models
def run(self):
model_names = []
metric_names = []
organized_metrics = {'ranking': [], 'rating': []}

Expand All @@ -98,10 +97,7 @@ def run(self):
for model in self.models:
if self.verbose:
print(model.name)

model_names.append(model.name)
model_res = self.eval_method.evaluate(model=model, metrics=organized_metrics, user_based=self.user_based)
model_res._organize_avg_res(model_name=model.name, metric_names=metric_names)
model_res = self.eval_method.evaluate(model=model, metrics=self.metrics, user_based=self.user_based)
self.results._add_model_res(res=model_res, model_name=model.name)

self.results.show()
39 changes: 24 additions & 15 deletions cornac/experiment/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,28 @@ class SingleModelResult:
----------
"""

def __init__(self, metric_avg_results, metric_user_results=None):
def __init__(self, model_name, metrics, metric_avg_results, metric_user_results=None):
self.model_name = model_name
self.metric_names = self._get_metric_names(metrics)
self.avg = metric_avg_results
self.per_user = metric_user_results

def _organize_avg_res(self, model_name, metric_names):
self.avg = self._get_data_frame(avg_res=self.avg, model_name=model_name, metric_names=metric_names)
def organize_avg_res(self):
self.avg = self._get_data_frame(avg_res=self.avg)

def _get_data_frame(self, avg_res, model_name, metric_names):
avg_res = [avg_res.get(mt_name, np.nan) for mt_name in metric_names]
def _get_data_frame(self, avg_res):
avg_res = [avg_res.get(mt_name, np.nan) for mt_name in self.metric_names]
avg_res = np.asarray(avg_res)
avg_res = avg_res.reshape(1, len(metric_names))
avg_res = pd.DataFrame(data=avg_res, index=np.asarray([model_name]), columns=np.asarray(metric_names))
avg_res = avg_res.reshape(1, len(self.metric_names))
avg_res = pd.DataFrame(data=avg_res, index=np.asarray([self.model_name]), columns=np.asarray(self.metric_names))
return avg_res

def _get_metric_names(self, metrics):
metric_names = []
for mt in metrics:
metric_names.append(mt.name)
return metric_names


class CVSingleModelResult(SingleModelResult):
""" Cross Validation Result Class for a single model
Expand All @@ -37,12 +45,13 @@ class CVSingleModelResult(SingleModelResult):
----------
"""

def __init__(self, metric_avg_results=None):
self.avg = metric_avg_results
def __init__(self, model_name, metrics, metric_avg_results={}):
SingleModelResult.__init__(self, model_name, metrics, metric_avg_results)
#self.avg = metric_avg_results
self.per_fold_avg = {}
self.avg = {}
#self.avg = {}

def _add_fold_res(self, fold, metric_avg_results):
def add_fold_res(self, fold, metric_avg_results):
# think to organize the results first
self.per_fold_avg[fold] = metric_avg_results

Expand All @@ -52,14 +61,14 @@ def _compute_avg_res(self):
for f in self.per_fold_avg:
for mt in self.per_fold_avg[f]:
self.avg[mt] += self.per_fold_avg[f][mt] / len(self.per_fold_avg)
self._organize_avg_res()

def _organize_avg_res(self, model_name, metric_names):
def _organize_avg_res(self):
# global avg
self.avg = self._get_data_frame(avg_res=self.avg, model_name=model_name, metric_names=metric_names)
self.avg = self._get_data_frame(avg_res=self.avg)
# per_fold avg
for f in self.per_fold_avg:
self.per_fold_avg[f] = self._get_data_frame(avg_res=self.per_fold_avg[f], model_name=model_name,
metric_names=metric_names)
self.per_fold_avg[f] = self._get_data_frame(avg_res=self.per_fold_avg[f])


class Result:
Expand Down

0 comments on commit 177508e

Please sign in to comment.