Skip to content

Commit

Permalink
Refactor minibatch usage and evaluators
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasZahradnik committed Nov 4, 2022
1 parent f2ff321 commit e81bf55
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 48 deletions.
5 changes: 4 additions & 1 deletion neuralogic/core/builder/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,11 @@ def get_unit_weight() -> "Weight":
class BuiltDataset:
"""BuiltDataset represents an already built dataset - that is, a dataset that has been grounded and neuralized."""

def __init__(self, samples):
__slots__ = "samples", "batch_size"

def __init__(self, samples, batch_size: int):
self.samples = samples
self.batch_size = batch_size

def __len__(self):
return len(self.samples)
Expand Down
17 changes: 14 additions & 3 deletions neuralogic/core/builder/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,16 @@ def build_dataset(
dataset: datasets.BaseDataset,
settings: SettingsProxy,
*,
batch_size: int = 1,
file_mode: bool = False,
learnable_facts: bool = False,
progress: bool = False,
) -> BuiltDataset:
"""Builds the dataset (does grounding and neuralization) for this template instance and the backend
:param dataset:
:param backend:
:param settings:
:param batch_size:
:param file_mode:
:param learnable_facts:
:param progress:
Expand All @@ -137,16 +138,26 @@ def build_dataset(
return self.build_dataset(
datasets.FileDataset(e_tf.name, q_tf.name),
settings,
batch_size=batch_size,
file_mode=False,
learnable_facts=learnable_facts,
progress=progress,
)

if isinstance(dataset, datasets.ConvertableDataset):
return self.build_dataset(
dataset.to_dataset(), settings, file_mode=False, learnable_facts=learnable_facts, progress=progress
dataset.to_dataset(),
settings,
batch_size=batch_size,
file_mode=False,
learnable_facts=learnable_facts,
progress=progress,
)

if batch_size > 1:
settings.settings.minibatchSize = batch_size
settings.settings.parallelTraining = True

if isinstance(dataset, datasets.Dataset):
self.examples_counter = 0
self.query_counter = 0
Expand Down Expand Up @@ -191,7 +202,7 @@ def build_dataset(
else:
raise NotImplementedError

return BuiltDataset(samples)
return BuiltDataset(samples, batch_size)

@staticmethod
def merge_queries_with_examples(queries, examples, one_query_per_example, example_queries=True):
Expand Down
26 changes: 19 additions & 7 deletions neuralogic/nn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,21 @@ def __call__(self, sample):
raise NotImplementedError

def build_dataset(
self, dataset: BaseDataset, *, file_mode: bool = False, learnable_facts: bool = False, progress: bool = False
self,
dataset: BaseDataset,
*,
batch_size: int = 1,
file_mode: bool = False,
learnable_facts: bool = False,
progress: bool = False,
):
return self.dataset_builder.build_dataset(
dataset, self.settings, file_mode=file_mode, learnable_facts=learnable_facts, progress=progress
dataset,
self.settings,
batch_size=batch_size,
file_mode=file_mode,
learnable_facts=learnable_facts,
progress=progress,
)

def set_hooks(self, hooks):
Expand Down Expand Up @@ -92,25 +103,26 @@ def draw(
class AbstractEvaluator:
def __init__(self, template: Template, settings: Settings):
self.settings = settings.create_proxy()
self.dataset: Optional[BuiltDataset] = None

self.neuralogic_model = template.build(settings)
self.neuralogic_model.set_hooks(template.hooks)

def set_dataset(self, dataset: Union[BaseDataset, BuiltDataset]):
self.dataset = self.build_dataset(dataset)

def build_dataset(
self,
dataset: Union[BaseDataset, BuiltDataset],
*,
batch_size: int = 1,
file_mode: bool = False,
learnable_facts: bool = False,
progress: bool = False,
):
if isinstance(dataset, BaseDataset):
return self.neuralogic_model.build_dataset(
dataset, file_mode=file_mode, learnable_facts=learnable_facts, progress=progress
dataset,
batch_size=batch_size,
file_mode=file_mode,
learnable_facts=learnable_facts,
progress=progress,
)
return dataset

Expand Down
42 changes: 6 additions & 36 deletions neuralogic/nn/evaluator/java.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import Optional, Dict, Union

import jpype

from neuralogic.core import Template, BuiltDataset
from neuralogic.nn.base import AbstractEvaluator
from neuralogic.core.settings import Settings
Expand All @@ -17,70 +15,42 @@ def __init__(
):
super().__init__(template, settings)

def set_dataset(self, dataset: Union[BaseDataset, BuiltDataset]):
super().set_dataset(dataset)

if self.dataset is None:
raise Exception("Invalid state of dataset - dataset in the evaluator is None")

self.neuralogic_model.set_training_samples(
jpype.java.util.ArrayList([sample.java_sample for sample in self.dataset.samples])
)

def reset_dataset(self, dataset):
if dataset is None:
self.neuralogic_model.set_training_samples(jpype.java.util.ArrayList([]))
else:
self.neuralogic_model.set_training_samples(
jpype.java.util.ArrayList([sample.java_sample for sample in dataset.samples])
)
self.dataset = dataset

def train(
self,
dataset: Optional[Union[BaseDataset, BuiltDataset]] = None,
*,
generator: bool = True,
epochs: int = None,
batch_size: int = 1,
):
old_dataset = None

if dataset is not None:
old_dataset = self.dataset
self.set_dataset(dataset)
dataset = self.build_dataset(dataset)

if epochs is None:
epochs = self.settings.epochs

def _train():
for _ in range(epochs):
results, total_len = self.neuralogic_model(None, True, batch_size=batch_size)
results, total_len = self.neuralogic_model(dataset, True)
yield sum(result[2] for result in results), total_len
if dataset is not None:
self.reset_dataset(old_dataset)

if generator:
return _train()

results, total_len = self.neuralogic_model(None, True, epochs=epochs, batch_size=batch_size)
if dataset is not None:
self.reset_dataset(old_dataset)
results, total_len = self.neuralogic_model(dataset, True, epochs=epochs)

return sum(result[2] for result in results), total_len

def test(
self, dataset: Optional[Union[BaseDataset, BuiltDataset]] = None, *, generator: bool = True, batch_size: int = 1
):
dataset = self.dataset if dataset is None else self.build_dataset(dataset)
dataset = self.build_dataset(dataset)

def _test():
for sample in dataset.samples:
yield self.neuralogic_model(sample, False, batch_size=batch_size)
yield self.neuralogic_model(sample, False)

if generator:
return _test()
return self.neuralogic_model(dataset.samples, False, batch_size=batch_size)
return self.neuralogic_model(dataset, False)

def state_dict(self) -> Dict:
return self.neuralogic_model.state_dict()
Expand Down
10 changes: 9 additions & 1 deletion neuralogic/nn/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import jpype

from neuralogic import is_initialized, initialize
from neuralogic.core import BuiltDataset
from neuralogic.core.constructs.java_objects import ValueFactory
from neuralogic.nn.base import AbstractNeuraLogic
from neuralogic.core.settings import SettingsProxy
Expand Down Expand Up @@ -61,9 +62,16 @@ def set_training_samples(self, samples):
self.samples_len = len(samples)
self.strategy.setSamples(jpype.java.util.ArrayList(samples))

def __call__(self, samples=None, train: bool = None, epochs: int = 1, batch_size: int = 1):
def __call__(self, dataset=None, train: bool = None, epochs: int = 1):
self.hooks_set = len(self.hooks) != 0

if isinstance(dataset, BuiltDataset):
samples = dataset.samples
batch_size = dataset.batch_size
else:
samples = dataset
batch_size = 1

if self.hooks_set:
self.strategy.setHooks(set(self.hooks.keys()), self.hook_handler)

Expand Down

0 comments on commit e81bf55

Please sign in to comment.