# Data Usage

In [1]:
from neuralogic.dataset import Data, TensorDataset


# Some node features (don't have to be one hot encoded)
node_features = [
    [0, 0, 1],  # Node 0 features
    [1, 0, 0],  # Node 1 features
    [0, 1, 0],  # Node 2 features
]

edge_index = [ # Two edges -> (0, 1), (2, 0)
    [0, 2],    # Edge source
    [1, 0],    # Edge dest
]

target = 5  # Doesn't have to be scalar

In [2]:
data = Data(x=node_features, edge_index=edge_index, y=target)

## Translation into rules

In [3]:
query, examples = data.to_logic_form()

In [4]:
str(query)

'5.0 predict.'

In [5]:
for example in examples:
    print(example)

<1> edge(0, 1).
<1> edge(2, 0).
<[0, 0, 1]> node_feature(0).
<[1, 0, 0]> node_feature(1).
<[0, 1, 0]> node_feature(2).


### Translation customization

#### Target label to one hot encoding

In [6]:
query, examples = data.to_logic_form(one_hot_encode_labels=True, max_classes=10)

In [7]:
# the 6th (target is 5, indexing starts with 0) element out of 10 elements (max_classes arg) is set to one
str(query)

'[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] predict.'

#### One hot features decoding

In [8]:
query, examples = data.to_logic_form(one_hot_decode_features=True)

In [9]:
for example in examples:
    print(example)

<1> edge(0, 1).
<1> edge(2, 0).
<1> node_feature_2(0).
<1> node_feature_0(1).
<1> node_feature_1(2).


In [10]:
# node_feature_<argmax(features)>(node_id)
# [0, 0, 1] node_feature(0) -> node_feature_2(0)

#### Changing predicates' names

In [11]:
query, examples = data.to_logic_form(feature_name="atom", edge_name="bond", output_name="output")

In [12]:
str(query)

'5.0 output.'

In [13]:
for example in examples:
    print(example)

<1> bond(0, 1).
<1> bond(2, 0).
<[0, 0, 1]> atom(0).
<[1, 0, 0]> atom(1).
<[0, 1, 0]> atom(2).


# Dataset usage with Data

In [14]:
list_of_samples = [
    data,
]

In [15]:
dataset = TensorDataset(data=list_of_samples)

You can dump dataset into files (in the logic/java form) with `dataset.dump_to_file` (takes str filenames) or `dataset.dump` (takes writable objects -> objects with methods `write`) 

Customizations supported by `Data` can be set via `Dataset` constructor.

In [16]:
dataset = TensorDataset(data=list_of_samples, feature_name="bond")  # same with one hot encodings/decodings etc.

Datasets in this (tensor) representation can be used as any other datasets -> you can just pass them into the `build_dataset` method or the forward propagation method without any extra steps needed