Skip to content

Commit 9915a0f

Browse files
committed
add auto deduction for augmentation step
1 parent abb6036 commit 9915a0f

File tree

7 files changed

+28
-14
lines changed

7 files changed

+28
-14
lines changed

config/template/graph.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ train:
6565
negative_weight: 5
6666
# Exponent of degrees in negative sampling. Default is recommended.
6767
negative_sample_exponent: 0.75
68-
# Augmentation step. Need to be tuned on the validation set.
68+
# Augmentation step. Default is usually reasonable.
6969
# Larger value is needed for sparser graphs.
70-
augmentation_step: 5
70+
augmentation_step: auto
7171
# Return parameter and in-out parameters (node2vec). Need to be tuned on the validation set.
7272
p: 1
7373
q: 1

doc/source/user/auto.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,9 @@ Here lists hyperparameters that support auto deduction.
2424
num_partition: auto
2525
episode_size: auto
2626
27+
train:
28+
# for node embedding
29+
augmentation_step: auto
30+
2731
.. note::
2832
The auto value for ``gpus`` is an empty list.

include/bind.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,11 @@ class pyGraphSolver : public py::class_<graphvite::GraphSolver<dim, Float, Index
451451

452452
def("train", &GraphSolver::train, py::no_gil(),
453453
py::arg("model") = "LINE", py::arg("num_epoch") = 2000, py::arg("resume") = false,
454-
py::arg("augmentation_step") = 5, py::arg("random_walk_length") = 40,
454+
py::arg("augmentation_step") = graphvite::kAuto, py::arg("random_walk_length") = 40,
455455
py::arg("random_walk_batch_size") = 100, py::arg("shuffle_base") = graphvite::kAuto, py::arg("p") = 1,
456456
py::arg("q") = 1, py::arg("positive_reuse") = 1, py::arg("negative_sample_exponent") = 0.75,
457457
py::arg("negative_weight") = 5, py::arg("log_frequency") = 1000,
458-
"train(model='LINE', num_epoch=2000, resume=False, augmentation_step=5, random_walk_length=40, "
458+
"train(model='LINE', num_epoch=2000, resume=False, augmentation_step=auto, random_walk_length=40, "
459459
"random_walk_batch_size=100, shuffle_base=auto, p=1, q=1, positive_reuse=1, "
460460
"negative_sample_exponent=0.75, negative_weight=5, log_frequency=1000)"
461461
R"(

include/instance/graph.cuh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252

5353
namespace graphvite {
5454

55+
const int kExpectedDegree = 1600;
56+
5557
/**
5658
* @brief Normal graphs without attributes
5759
* @tparam _Index integral type of node indexes
@@ -742,7 +744,7 @@ public:
742744
* @param _log_frequency log every log_frequency batches
743745
*/
744746
void train(const std::string &_model = "LINE", int _num_epoch = 2000, bool _resume = false,
745-
int _augmentation_step = 5, int _random_walk_length = 40, int _random_walk_batch_size = 100,
747+
int _augmentation_step = kAuto, int _random_walk_length = 40, int _random_walk_batch_size = 100,
746748
int _shuffle_base = kAuto, float _p = 1, float _q = 1, int _positive_reuse = 1,
747749
float _negative_sample_exponent = 0.75, float _negative_weight = 5, int _log_frequency = 1000) {
748750
augmentation_step = _augmentation_step;
@@ -752,6 +754,8 @@ public:
752754
p = _p;
753755
q = _q;
754756

757+
if (augmentation_step == kAuto)
758+
augmentation_step = log(kExpectedDegree) / log(float(num_edge) / num_vertex);
755759
if (shuffle_base == kAuto)
756760
shuffle_base = augmentation_step;
757761
if (model == "DeepWalk" || model == "node2vec")

python/graphvite/application/application.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,16 @@ class ApplicationMixin(object):
4343
dim (int): dimension of embeddings
4444
gpus (list of int, optional): GPU ids, default is all GPUs
4545
cpu_per_gpu (int, optional): number of CPU threads per GPU, default is all CPUs
46+
gpu_memory_limit (int, optional): memory limit per GPU in bytes, default is all memory
4647
float_type (dtype, optional): type of parameters
4748
index_type (dtype, optional): type of graph indexes
4849
"""
49-
def __init__(self, dim, gpus=[], cpu_per_gpu=auto, float_type=cfg.float_type, index_type=cfg.index_type):
50+
def __init__(self, dim, gpus=[], cpu_per_gpu=auto, gpu_memory_limit=auto,
51+
float_type=cfg.float_type, index_type=cfg.index_type):
5052
self.dim = dim
5153
self.gpus = gpus
5254
self.cpu_per_gpu = cpu_per_gpu
55+
self.gpu_memory_limit = gpu_memory_limit
5356
self.float_type = float_type
5457
self.index_type = index_type
5558
self.set_format()
@@ -236,7 +239,8 @@ def get_solver(self, **kwargs):
236239
num_sampler_per_worker = auto
237240
else:
238241
num_sampler_per_worker = self.cpu_per_gpu - 1
239-
return solver.GraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker)
242+
return solver.GraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker,
243+
self.gpu_memory_limit)
240244

241245
def node_classification(self, X=None, Y=None, file_name=None, portions=(0.02,), normalization=False, times=1,
242246
patience=100):
@@ -513,7 +517,8 @@ def get_solver(self, **kwargs):
513517
num_sampler_per_worker = auto
514518
else:
515519
num_sampler_per_worker = self.cpu_per_gpu - 1
516-
return solver.GraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker)
520+
return solver.GraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker,
521+
self.gpu_memory_limit)
517522

518523

519524
class KnowledgeGraphApplication(ApplicationMixin):
@@ -573,7 +578,8 @@ def get_solver(self, **kwargs):
573578
num_sampler_per_worker = auto
574579
else:
575580
num_sampler_per_worker = self.cpu_per_gpu - 1
576-
return solver.KnowledgeGraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker)
581+
return solver.KnowledgeGraphSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker,
582+
self.gpu_memory_limit)
577583

578584
def entity_prediction(self, H=None, R=None, T=None, file_name=None, save_file=None, target="tail", k=10,
579585
backend=cfg.backend):
@@ -1032,7 +1038,8 @@ def get_solver(self, **kwargs):
10321038
else:
10331039
num_sampler_per_worker = self.cpu_per_gpu - 1
10341040

1035-
return solver.VisualizationSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker)
1041+
return solver.VisualizationSolver(self.dim, self.float_type, self.index_type, self.gpus, num_sampler_per_worker,
1042+
self.gpu_memory_limit)
10361043

10371044
def visualization(self, Y=None, file_name=None, save_file=None, figure_size=10, scale=2):
10381045
"""

python/graphvite/cmd.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ def load_data(file_name):
176176
else:
177177
labels = None
178178

179-
gv.init_logging(logging.INFO)
180-
# gv.init_logging(logging.WARNING)
179+
gv.init_logging(logging.WARNING)
181180

182181
app = gap.VisualizationApplication(args.dim, [0])
183182
app.load(vectors=vectors, perplexity=args.perplexity)

python/graphvite/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def train_feature_data_preprocess(self, save_file):
886886
numpy_file = os.path.splitext(save_file)[0] + ".npy"
887887
if os.path.exists(numpy_file):
888888
return np.load(numpy_file)
889-
features = self.image_feature_data(self.train_image, save_file)
889+
features = self.image_feature_data(self.train_image)
890890
np.save(numpy_file, features)
891891
return features
892892

@@ -932,7 +932,7 @@ def valid_feature_data_preprocess(self, save_file):
932932
numpy_file = os.path.splitext(save_file)[0] + ".npy"
933933
if os.path.exists(numpy_file):
934934
return np.load(numpy_file)
935-
features = self.image_feature_data(self.valid_image, save_file)
935+
features = self.image_feature_data(self.valid_image)
936936
np.save(numpy_file, features)
937937
return features
938938

0 commit comments

Comments
 (0)