Skip to content

Commit

Permalink
Merge pull request #5 from RUCAIBox/master
Browse files Browse the repository at this point in the history
Merge
  • Loading branch information
chenyushuo authored Jul 20, 2020
2 parents 4138d9c + d3b6281 commit b695c8b
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 14 deletions.
16 changes: 12 additions & 4 deletions evaluator/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import numpy as np
import pandas as pd
import utils
# from joblib import Parallel, delayed
# from pandarallel import pandarallel
# pandarallel.initialize()

# 'Precision', 'Hit', 'Recall', 'MAP', 'NDCG', 'MRR', 'AUC'
metric_name = {metric.lower() : metric for metric in ['Hit', 'Recall', 'MRR', 'AUC']}
Expand Down Expand Up @@ -71,15 +74,20 @@ def metric_info(self, df, metric, k):

fuc = getattr(utils, metric)
if metric == 'auc':
metric_fuc = lambda x: fuc(x, self.neg_ratio)
metric_fuc = lambda x: fuc(x.values, self.neg_ratio)
elif metric == 'precision':
metric_fuc = lambda x: fuc(x, k)
metric_fuc = lambda x: fuc(x.values, k)
else:
metric_fuc = fuc
metric_fuc = lambda x: fuc(x.values)

# groups = df.groupby(self.USER_FIELD)['rank']
# results = Parallel(n_jobs=10)(delayed(metric_fuc)(group) for _, group in groups)
# result = np.mean(results)
# df.parallel_apply()

metric_result = df.groupby(self.USER_FIELD)['rank'].apply(metric_fuc)
return metric_result


def evaluate(self, df):
"""Generate metrics results on the dataset
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@

trainer = Trainer(config, model, logger)
# trainer.resume_checkpoint('saved/model_best.pth')
trainer.fit(train_data, valid_data)
best_valid_score, _ = trainer.fit(train_data, valid_data)
result = trainer.evaluate(test_data)
print(result)
print(best_valid_score)
6 changes: 4 additions & 2 deletions model/context_aware_recommender/nfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, config, dataset):
self.first_order_linear = FMFirstOrderLinear(self.field_dims, self.offsets)
self.embedding = FMEmbedding(self.field_dims, self.offsets, self.embedding_size)
self.fm = BaseFactorizationMachine(reduce_sum=False)
self.mlp_layers = MLPLayers(self.layers, self.dropout, activation='sigmoid')
self.mlp_layers = MLPLayers(size_list, self.dropout, activation='sigmoid')
self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1, bias=False)
self.sigmoid = nn.Sigmoid()
self.loss = nn.BCELoss()
Expand Down Expand Up @@ -69,7 +69,9 @@ def forward(self, interaction):
# todo: check (batch) or (batch, 1)
x.append(interaction[field].unsqueeze(1))
x = torch.cat(x, dim=1)
y = self.predict_layer(self.mlp_layers(self.embedding(x)))+self.first_order_linear(x)
emb_x = self.fm(self.embedding(x))

y = self.predict_layer(self.mlp_layers(emb_x))+self.first_order_linear(x)
y = self.sigmoid(y)
return y.squeeze()

Expand Down
14 changes: 14 additions & 0 deletions properties/dataset/ml-10m.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[data]

########define the UIRT columns
USER_ID_FIELD='user_id'
ITEM_ID_FIELD='item_id'
NEG_PREFIX='neg_'
LABEL_FIELD='label'

#########select load columns
UNLOAD_COLUMN=['timestamp']

########data separator
field_separator='\t'
seq_separator=' '
14 changes: 14 additions & 0 deletions properties/dataset/ml-1m.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[data]

########define the UIRT columns
USER_ID_FIELD='user_id'
ITEM_ID_FIELD='item_id'
NEG_PREFIX='neg_'
LABEL_FIELD='label'

#########select load columns
UNLOAD_COLUMN=['timestamp']

########data separator
field_separator='\t'
seq_separator=' '
5 changes: 5 additions & 0 deletions properties/model/NFM.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[model]

embedding_size=64
mlp_hidden_size=[64, 64, 64]
dropout=0.0
17 changes: 17 additions & 0 deletions run_hyper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
# @Time : 2020/7/17 16:13
# @Author : Shanlei Mu
# @Email : slmu@ruc.edu.cn
# @File : run_hyper.py

from trainer import HyperTuning


def main():
hp = HyperTuning('main.py', params_file='trainer/hyper.example', max_evals=5)
hp.run()
# print(hp.best_params)


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions trainer/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .trainer import Trainer
from .hyper_tuning import HyperTuning
5 changes: 5 additions & 0 deletions trainer/hyper.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
embedding_size choice [64,96,128]
mlp_hidden_size choice ['[64,64,64]','[64,64]','[64]']
dropout uniform 0.0,1.0
num_layers quniform 1,3,1
learning_rate loguniform -8,0
96 changes: 96 additions & 0 deletions trainer/hyper_tuning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
# @Time : 2020/7/19 19:06
# @Author : Shanlei Mu
# @Email : slmu@ruc.edu.cn
# @File : hyper_tuning.py

import sys
import subprocess
import hyperopt
from hyperopt import fmin, tpe, hp


class HyperTuning(object):
def __init__(self, procudure_filename, params_file=None, space=None, interpreter='python', algo=tpe.suggest, max_evals=100, bigger=True):
self.filename = procudure_filename
self.interpreter = interpreter
self.algo = algo
self.max_evals = max_evals
self.bigger = bigger
if self.bigger:
self.init_score = - float('inf')
else:
self.init_score = float('inf')
self.best_score = self.init_score
self.best_params = None
if space:
self.space = space
elif params_file:
self.space = self._build_space_from_file(params_file)
else:
raise ValueError('at least one of `space` and `params_file` is provided')

@staticmethod
def flush():
sys.stdout.flush()
sys.stderr.flush()

@staticmethod
def params2cmd(interpreter, filename, params):
cmd = interpreter + ' ' + filename
for param_name in params:
param_value = params[param_name]
cmd += ' --' + param_name
if isinstance(param_value, str):
cmd += '=%s' % param_value
elif int(param_value) == param_value:
cmd += '=%d' % int(param_value)
else:
cmd += '=%g' % float('%.1e' % float(param_value))
return cmd

@staticmethod
def _build_space_from_file(file):
space = {}
with open(file, 'r') as fp:
for line in fp:
para_name, para_type, para_value = line.strip().split(' ')
if para_type == 'choice':
para_value = eval(para_value)
space[para_name] = hp.choice(para_name, para_value)
elif para_type == 'uniform':
low, high = para_value.strip().split(',')
space[para_name] = hp.uniform(para_name, float(low), float(high))
elif para_type == 'quniform':
low, high, q = para_value.strip().split(',')
space[para_name] = hp.quniform(para_name, float(low), float(high), float(q))
elif para_type == 'loguniform':
low, high = para_value.strip().split(',')
space[para_name] = hp.loguniform(para_name, float(low), float(high))
else:
raise ValueError('Illegal para type [{}]'.format(para_type))
return space

def trial(self, params):
cmd = self.params2cmd(self.interpreter, self.filename, params)
try:
print('\n\n running command: @ %s' % cmd, file=sys.stderr)
self.flush()
output = subprocess.check_output(cmd, shell=True)
except subprocess.CalledProcessError:
return {'loss': self.init_score, 'status': hyperopt.STATUS_FAIL}
output = output.decode(encoding='UTF-8')
score = float(output.strip().split('\n')[-1])
if self.bigger:
if score > self.best_score:
self.best_score = score
self.best_params = params
score = - score
else:
if score < self.best_score:
self.best_score = score
self.best_params = params
return {'loss': score, 'status': hyperopt.STATUS_OK}

def run(self):
fmin(self.trial, self.space, algo=self.algo, max_evals=self.max_evals)
13 changes: 8 additions & 5 deletions trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def __init__(self, config, model, logger):

self.start_epoch = 0
self.cur_step = 0
self.best_eval_score = -1
self.best_valid_score = -1
self.best_valid_result = None
self.train_loss_dict = dict()
self.optimizer = self._build_optimizer()
self.evaluator = Evaluator(config, logger)
Expand Down Expand Up @@ -89,7 +90,7 @@ def _save_checkpoint(self, epoch):
'config': self.config,
'epoch': epoch,
'cur_step': self.cur_step,
'best_eval_score': self.best_eval_score,
'best_valid_score': self.best_valid_score,
'state_dict': self.model.state_dict(),
'optimizer': self.optimizer.state_dict(),
}
Expand All @@ -100,7 +101,7 @@ def resume_checkpoint(self, resume_file):
checkpoint = torch.load(resume_file)
self.start_epoch = checkpoint['epoch'] + 1
self.cur_step = checkpoint['cur_step']
self.best_eval_score = checkpoint['best_eval_score']
self.best_valid_score = checkpoint['best_valid_score']

# load architecture params from checkpoint
if checkpoint['config']['model'].lower() != self.config['model'].lower():
Expand Down Expand Up @@ -133,8 +134,8 @@ def fit(self, train_data, valid_data=None):
if (epoch_idx + 1) % self.eval_step == 0:
valid_start_time = time()
valid_score, valid_result = self._valid_epoch(valid_data)
self.best_eval_score, self.cur_step, stop_flag, update_flag = early_stopping(
valid_score, self.best_eval_score, self.cur_step, max_step=self.stopping_step, order='asc')
self.best_valid_score, self.cur_step, stop_flag, update_flag = early_stopping(
valid_score, self.best_valid_score, self.cur_step, max_step=self.stopping_step, order='asc')
valid_end_time = time()
valid_score_output = "epoch %d evaluating [time: %.2fs, valid_score: %f]" % \
(epoch_idx, valid_end_time - valid_start_time, valid_score)
Expand All @@ -144,12 +145,14 @@ def fit(self, train_data, valid_data=None):
if update_flag:
self._save_checkpoint(epoch_idx)
update_output = 'Saving current best: %s' % self.saved_model_file
self.best_valid_result = valid_result
print(update_output)
if stop_flag:
stop_output = 'Finished training, best eval result in epoch %d' % \
(epoch_idx - self.cur_step * self.eval_step)
print(stop_output)
break
return self.best_valid_score, self.best_valid_result

def evaluate(self, eval_data, load_best_model=True, model_file=None):
if load_best_model:
Expand Down
2 changes: 1 addition & 1 deletion utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def hit(data):

def mrr(data):
tmp_x = data[data > 0]
if not tmp_x.empty:
if tmp_x.all():
return (1 / tmp_x).sum() / data.shape[0]
return 0

Expand Down

0 comments on commit b695c8b

Please sign in to comment.