# Distinguishing non-regular graphs

There are instances of graphs that are not *k*-regular nor isomorphic and yet
are not distinguishable via the classic GNNs when their nodes
have identical features. An example of such graphs is shown in the following image.

![Non Regular Graphs](https://raw.githubusercontent.com/LukasZahradnik/PyNeuraLogic/master/docs/_static/non_regular_graphs.png)


In PyNeuraLogic, you can easily distinguish those graphs,
for example yet again with the previous model ([Distinguishing K Regular Graphs](https://github.com/LukasZahradnik/PyNeuraLogic/blob/master/examples/DistinguishingKRegularGraphs.ipynb))
which captures triangular subgraph patterns.

Install PyNeuraLogic from PyPI

In [None]:
! pip install neuralogic

In [1]:
from neuralogic.nn import get_evaluator
from neuralogic.core import Backend
from neuralogic.core import R, Template, V
from neuralogic.core.settings import Settings, Optimizer
from neuralogic.dataset import Dataset


In [2]:
train_dataset = Dataset()
template = Template()

template.add_rules([
    # Captures triangle
    R.triangle(V.X)[1,] <= (
        R.edge(V.X, V.Y), R.feature(V.Y)[1,],
        R.edge(V.Y, V.Z), R.feature(V.Z)[1,],
        R.edge(V.Z, V.X), R.feature(V.X)[1,],
    ),

    # Captures general graph
    R.general(V.X)[1,] <= (R.edge(V.Y, V.X), R.feature(V.Y)[1,]),
    R.general(V.X)[1,] <= R.feature(V.X)[1,],

    R.predict <= R.general(V.X)[1,],
    R.predict <= R.triangle(V.X)[1,],
])

# Encoding of graph a)
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1), R.edge(2, 4),
        R.edge(4, 5), R.edge(5, 6), R.edge(6, 4),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3), R.edge(4, 2),
        R.edge(5, 4), R.edge(6, 5), R.edge(4, 6),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
    ],
)

# Encoding of graph b)
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 4), R.edge(4, 1),
        R.edge(2, 5), R.edge(5, 6), R.edge(6, 3),
        R.edge(2, 1), R.edge(3, 2), R.edge(4, 3), R.edge(1, 4),
        R.edge(5, 2), R.edge(6, 5), R.edge(3, 6),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
    ],
)

train_dataset.add_queries([
    R.predict[1],
    R.predict[0],
])

In [3]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, settings, Backend.JAVA)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["a", "b"]

for graph_id, predicted in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

Graph a is predicted to be class: 1 | 0.7405298885946587
Graph b is predicted to be class: 0 | 0.1375019328232371



Another interesting approach with a slightly different extension
of the vanilla GNNs might be capturing graphs based on its structure and the
cardinality of nodes. We can add additional information about the
cardinality of each node into examples, for instance, as atoms with
predicate's name *cardinality* with two terms -
the node id and its cardinality. We can then choose which atom will
be aggregated based on its cardinality to distinguish graph _a_ and graph *b*, as shown in Example 2, where we capture only the sub-graphs

The `a_graph` captures a triangle (`V.X`, `V.Y`, `V.Z`)
connected to one node (`V.T`) with a cardinality of three.
In contrast, the `b_graph` captures a cycle of length of four
 (`V.X`, `V.Y`, `V.Z`, `V.T`)
 which has to satisfy required cardinalities.


#### Example 2: Distinguishing between graphs based on their cardinality

In [4]:
train_dataset = Dataset()

template = Template()

template.add_rules([
    R.a_graph(V.X) <= (
        R.edge(V.X, V.Y), R.cardinality(V.Y, 2)[1,],
        R.edge(V.Y, V.Z), R.cardinality(V.Z, 2)[1,],
        R.edge(V.Z, V.X), R.cardinality(V.X, 3)[1,],
        R.edge(V.X, V.T), R.cardinality(V.T, 3)[1,],
        R.special.alldiff(...),
    ),
    R.b_graph(V.X) <= (
        R.edge(V.X, V.Y), R.cardinality(V.Y, 2)[1,],
        R.edge(V.Y, V.Z), R.cardinality(V.Z, 2)[1,],
        R.edge(V.Z, V.T), R.cardinality(V.T, 3)[1,],
        R.edge(V.T, V.X), R.cardinality(V.X, 3)[1,],
        R.special.alldiff(...),
    ),
    R.predict <= R.a_graph(V.X)[1,],
    R.predict <= R.b_graph(V.X)[1,],
])

# Encoding of graph a)
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1), R.edge(2, 4),
        R.edge(4, 5), R.edge(5, 6), R.edge(6, 4),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3), R.edge(4, 2),
        R.edge(5, 4), R.edge(6, 5), R.edge(4, 6),

        R.cardinality(1, 2), R.cardinality(2, 3), R.cardinality(3, 2),
        R.cardinality(4, 3), R.cardinality(5, 2), R.cardinality(6, 2),
    ],
)

# Encoding of graph b)
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 4), R.edge(4, 1),
        R.edge(2, 5), R.edge(5, 6), R.edge(6, 3),
        R.edge(2, 1), R.edge(3, 2), R.edge(4, 3), R.edge(1, 4),
        R.edge(5, 2), R.edge(6, 5), R.edge(3, 6),

        R.cardinality(1, 2), R.cardinality(2, 3), R.cardinality(3, 3),
        R.cardinality(4, 2), R.cardinality(5, 2), R.cardinality(6, 2),
    ],
)

train_dataset.add_queries([
    R.predict[1],
    R.predict[0],
])

In [5]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, settings)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["a", "b"]

for graph_id, predicted in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

Graph a is predicted to be class: 1 | 0.725646713848056
Graph b is predicted to be class: 0 | 4.85387435e-08


![Non Regular Molecules](https://raw.githubusercontent.com/LukasZahradnik/PyNeuraLogic/master/docs/_static/non_regular_molecules.png)

The image above shows two graphs, a
and b,
representing real-world structures of two molecules _Bicyclopentyl_
and *Decalin*, respectively. Yet again, the standard GNNs cannot distinguish between these
graphs structures.
Let us use PyNeuraLogic to capture, for example, the cycle of length five present in
graph _a_ and thus distinguish those instances, such as is shown in
Example 3.


#### Example 3: Capturing the cycle of the length of five

In [6]:
train_dataset = Dataset()
template = Template()

template.add_rules([
    # Captures cycle of the length of five (Bicyclopentyl)
    R.cycle_of_the_length_of_five(V.X)[1,] <= (
        R.edge(V.X, V.Y), R.feature(V.Y)[1,],
        R.edge(V.Y, V.Z), R.feature(V.Z)[1,],
        R.edge(V.Z, V.R), R.feature(V.R)[1,],
        R.edge(V.R, V.S), R.feature(V.S)[1,],
        R.edge(V.S, V.X), R.feature(V.X)[1,],
        R.special.alldiff(...),
    ),

    # Captures general graph (such as Decalin)
    R.general(V.X)[1,] <= (R.edge(V.Y, V.X), R.feature(V.Y)[1,]),
    R.general(V.X)[1,] <= R.feature(V.X)[1,],

    R.predict <= R.general(V.X)[1,],
    R.predict <= R.cycle_of_the_length_of_five(V.X)[1,],
])

# Encoding of graph Bicyclopentyl
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 4), R.edge(4, 5), R.edge(5, 1), R.edge(1, 6),
        R.edge(2, 1), R.edge(3, 2), R.edge(4, 3), R.edge(5, 4), R.edge(1, 5), R.edge(6, 1),
        R.edge(6, 7), R.edge(7, 8), R.edge(8, 9), R.edge(9, 10), R.edge(10, 6),
        R.edge(7, 6), R.edge(8, 7), R.edge(9, 8), R.edge(10, 9), R.edge(6, 10),

        R.feature(1), R.feature(2), R.feature(3), R.feature(4), R.feature(5),
        R.feature(6), R.feature(7), R.feature(8), R.feature(9), R.feature(10),
    ],
)

# Encoding of graph Decalin
train_dataset.add_example(
    [
        R.edge(1, 2), R.edge(2, 3), R.edge(3, 4), R.edge(4, 5), R.edge(5, 6), R.edge(1, 6),
        R.edge(2, 1), R.edge(3, 2), R.edge(4, 3), R.edge(5, 4), R.edge(6, 5), R.edge(6, 1),
        R.edge(6, 7), R.edge(7, 8), R.edge(8, 9), R.edge(9, 10), R.edge(10, 1),
        R.edge(7, 6), R.edge(8, 7), R.edge(9, 8), R.edge(10, 9), R.edge(1, 10),

        R.feature(1), R.feature(2), R.feature(3), R.feature(4), R.feature(5),
        R.feature(6), R.feature(7), R.feature(8), R.feature(9), R.feature(10),
    ],
)

train_dataset.add_queries([
    R.predict[1],
    R.predict[0],
])

In [7]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, settings)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

graphs = ["Bicyclopentyl", "Decalin"]

for graph_id, predicted in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Graph {graphs[graph_id]} is predicted to be class: {int(round(predicted))} | {predicted}")

Graph Bicyclopentyl is predicted to be class: 1 | 0.7530744743437316
Graph Decalin is predicted to be class: 0 | 0.1192371052030012
