# Distinguishing non-regular graphs

There are instances of graphs that are not *k*-regular nor isomorphic and yet
are not distinguishable via the message passing GNNs when their nodes
have identical features. An example of such graphs is shown in the following image.

![Non Regular Graphs](https://raw.githubusercontent.com/LukasZahradnik/PyNeuraLogic/master/docs/_static/non_regular_graphs.png)


In PyNeuraLogic, we are capable of distinguishing those graphs,
for example, via the previously proposed model (Distinguishing K Regular Graphs example)
which captures triangles of graph _a_ to distinguish between graphs.

In [1]:
from neuralogic.nn import get_evaluator
from neuralogic.core import Backend
from neuralogic.core import Relation, Template, Var, Term
from neuralogic.core.settings import Settings, Optimizer
from neuralogic.utils.data import Dataset


In [2]:
train_dataset = Dataset()
template = Template()

template.add_rules([
    # Captures triangle
    Relation.triangle(Var.X)[1,] <= (
        Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.feature(Var.Z)[1,],
        Relation.edge(Var.Z, Var.X), Relation.feature(Var.X)[1,],
    ),

    # Captures general graph
    Relation.general(Var.X)[1,] <= (Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,]),
    Relation.general(Var.X)[1,] <= Relation.feature(Var.Y)[1,],

    Relation.predict <= Relation.general(Var.X)[1,],
    Relation.predict <= Relation.triangle(Var.X)[1,],
])

# Encoding of graph a)
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1), Relation.edge(2, 4),
        Relation.edge(4, 5), Relation.edge(5, 6), Relation.edge(6, 4),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3), Relation.edge(4, 2),
        Relation.edge(5, 4), Relation.edge(6, 5), Relation.edge(4, 6),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
    ],
)

# Encoding of graph b)
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 4), Relation.edge(4, 1),
        Relation.edge(2, 5), Relation.edge(5, 6), Relation.edge(6, 3),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(4, 3), Relation.edge(1, 4),
        Relation.edge(5, 2), Relation.edge(6, 5), Relation.edge(3, 6),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
    ],
)

train_dataset.add_queries([
    Relation.predict[1],
    Relation.predict[0],
])

In [3]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, Backend.JAVA, settings)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["a", "b"]

for graph_id, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

Graph a is predicted to be class: 1 | 0.9294944095434514
Graph b is predicted to be class: 0 | 0.07101553696957114


Another interesting approach of a slightly different extension
of vanilla GNNs might be capturing based on the structure and the
cardinality of nodes. We can add additional information about the
cardinality of each node into examples, for instance, as atoms with
predicate's name *cardinality* with two terms -
the node id and its cardinality. We can then choose which atom will
be aggregated based on its cardinality to distinguish graph _a_ and graph *b*, as shown in Example 2, where we capture only sub-graphs of graphs

The `a_graph` captures a triangle (`Var.X`, `Var.Y`, `Var.Z`)
connected to one node (`Var.T`) with a cardinality of three.
In contrast, the `b_graph` captures a cycle of length of four
 (`Var.X`, `Var.Y`, `Var.Z`, `Var.T`)
 which has to satisfy required cardinalities.


#### Example 2: Distinguishing between graphs based on their cardinality

In [4]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
train_dataset = Dataset()

template = Template(settings)

template.add_rules([
    Relation.a_graph(Var.X) <= (
        Relation.edge(Var.X, Var.Y), Relation.cardinality(Var.Y, 2)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.cardinality(Var.Z, 2)[1,],
        Relation.edge(Var.Z, Var.X), Relation.cardinality(Var.X, 3)[1,],
        Relation.edge(Var.X, Var.T), Relation.cardinality(Var.T, 3)[1,],
        Relation.special.alldiff(...),
    ),
    Relation.b_graph(Var.X) <= (
        Relation.edge(Var.X, Var.Y), Relation.cardinality(Var.Y, 2)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.cardinality(Var.Z, 2)[1,],
        Relation.edge(Var.Z, Var.T), Relation.cardinality(Var.T, 3)[1,],
        Relation.edge(Var.T, Var.X), Relation.cardinality(Var.X, 3)[1,],
        Relation.special.alldiff(...),
    ),
    Relation.predict <= Relation.a_graph(Var.X)[1,],
    Relation.predict <= Relation.b_graph(Var.X)[1,],
])

# Encoding of graph a)
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1), Relation.edge(2, 4),
        Relation.edge(4, 5), Relation.edge(5, 6), Relation.edge(6, 4),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3), Relation.edge(4, 2),
        Relation.edge(5, 4), Relation.edge(6, 5), Relation.edge(4, 6),

        Relation.cardinality(1, 2), Relation.cardinality(2, 3), Relation.cardinality(3, 2),
        Relation.cardinality(4, 3), Relation.cardinality(5, 2), Relation.cardinality(6, 2),
    ],
)

# Encoding of graph b)
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 4), Relation.edge(4, 1),
        Relation.edge(2, 5), Relation.edge(5, 6), Relation.edge(6, 3),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(4, 3), Relation.edge(1, 4),
        Relation.edge(5, 2), Relation.edge(6, 5), Relation.edge(3, 6),

        Relation.cardinality(1, 2), Relation.cardinality(2, 3), Relation.cardinality(3, 3),
        Relation.cardinality(4, 2), Relation.cardinality(5, 2), Relation.cardinality(6, 2),
    ],
)

train_dataset.add_queries([
    Relation.predict[1],
    Relation.predict[0],
])

In [5]:
neuralogic_evaluator = get_evaluator(template, Backend.DYNET)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["a", "b"]

for graph_id, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

[dynet] random seed: 1852711818
[dynet] allocating memory: 512MB
[dynet] memory allocation done.


Graph a is predicted to be class: 1 | 0.891069233417511
Graph b is predicted to be class: 0 | -9.999054311746409e-23


![Non Regular Molecules](https://raw.githubusercontent.com/LukasZahradnik/PyNeuraLogic/master/docs/_static/non_regular_molecules.png)

The image above shows two graphs, a
and b,
representing a real-world structure of two molecules _Bicyclopentyl_
and *Decalin*, respectively. The message passing GNN cannot again distinguish between
graphs under the condition of identical features for all nodes.
In PyNeuraLogic, we can embed, for example, the cycle of length five present in
graph _a_ and thus distinguish those instances, such as is shown in
Example 3.


#### Example 3: Capturing the cycle of the length of five

In [6]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
train_dataset = Dataset()
template = Template(settings)

template.add_rules([
    # Captures cycle of the length of five (Bicyclopentyl)
    Relation.cycle_of_the_length_of_five(Var.X)[1,] <= (
        Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.feature(Var.Z)[1,],
        Relation.edge(Var.Z, Var.R), Relation.feature(Var.R)[1,],
        Relation.edge(Var.R, Var.S), Relation.feature(Var.S)[1,],
        Relation.edge(Var.S, Var.X), Relation.feature(Var.X)[1,],
        Relation.special.alldiff(...),
    ),

    # Captures general graph (such as Decalin)
    Relation.general(Var.X)[1,] <= (Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,]),
    Relation.general(Var.X)[1,] <= Relation.feature(Var.Y)[1,],

    Relation.predict <= Relation.general(Var.X)[1,],
    Relation.predict <= Relation.cycle_of_the_length_of_five(Var.X)[1,],
])

# Encoding of graph Bicyclopentyl
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 4), Relation.edge(4, 5), Relation.edge(5, 1), Relation.edge(1, 6),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(4, 3), Relation.edge(5, 4), Relation.edge(1, 5), Relation.edge(6, 1),
        Relation.edge(6, 7), Relation.edge(7, 8), Relation.edge(8, 9), Relation.edge(9, 10), Relation.edge(10, 6),
        Relation.edge(7, 6), Relation.edge(8, 7), Relation.edge(9, 8), Relation.edge(10, 9), Relation.edge(6, 10),

        Relation.feature(1), Relation.feature(2), Relation.feature(3), Relation.feature(4), Relation.feature(5),
        Relation.feature(6), Relation.feature(7), Relation.feature(8), Relation.feature(9), Relation.feature(10),
    ],
)

# Encoding of graph Decalin
train_dataset.add_example(
    [
        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 4), Relation.edge(4, 5), Relation.edge(5, 6), Relation.edge(1, 6),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(4, 3), Relation.edge(5, 4), Relation.edge(6, 5), Relation.edge(6, 1),
        Relation.edge(6, 7), Relation.edge(7, 8), Relation.edge(8, 9), Relation.edge(9, 10), Relation.edge(10, 1),
        Relation.edge(7, 6), Relation.edge(8, 7), Relation.edge(9, 8), Relation.edge(10, 9), Relation.edge(1, 10),

        Relation.feature(1), Relation.feature(2), Relation.feature(3), Relation.feature(4), Relation.feature(5),
        Relation.feature(6), Relation.feature(7), Relation.feature(8), Relation.feature(9), Relation.feature(10),
    ],
)

train_dataset.add_queries([
    Relation.predict[1],
    Relation.predict[0],
])

In [7]:
neuralogic_evaluator = get_evaluator(template, Backend.DYNET)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["Bicyclopentyl", "Decalin"]

for graph_id, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

Graph Bicyclopentyl is predicted to be class: 1 | 0.9292657971382141
Graph Decalin is predicted to be class: 0 | 0.08129000663757324
