# Pattern Matching

One of the substantial extensions of vanilla Graph Neural Networks
introduced by the PyNeuraLogic library is capturing underlying graph
patterns. We can, for instance, recognize nodes that are parts of cycles,
such as cycles of the length of three - triangles,
presented in Example [1](#Example-1:-Capturing-the-pattern-of-triangles).

In [7]:
from neuralogic.nn import get_evaluator
from neuralogic.core import Backend
from neuralogic.core import Atom, Template, Var, Term
from neuralogic.core.settings import Settings, Optimizer
from neuralogic.utils.data import Dataset

#### Example 1: Capturing the pattern of triangles


In [8]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
train_dataset = Dataset()

with Template(settings).context() as template:
    template.add_rules([
        # Captures triangle
        Atom.triangle(Var.X)[1,] <= (
            Atom.edge(Var.X, Var.Y), Atom.feature(Var.Y)[1,],
            Atom.edge(Var.Y, Var.Z), Atom.feature(Var.Z)[1,],
            Atom.edge(Var.Z, Var.X), Atom.feature(Var.X)[1,],
        ),

        # Captures general graph
        Atom.general(Var.X)[1,] <= (Atom.edge(Var.X, Var.Y), Atom.feature(Var.Y)[1,]),
        Atom.general(Var.X)[1,] <= Atom.feature(Var.Y)[1,],

        Atom.predict(Var.X) <= Atom.general(Var.X)[1,],
        Atom.predict(Var.X) <= Atom.triangle(Var.X)[1,],
    ])

    train_dataset.add_example(
        [
            # Encoded graph:      6--4--1---2
            #                        |   \ /
            #                        5    3

            Atom.edge(1, 2), Atom.edge(2, 3), Atom.edge(3, 1),
            Atom.edge(2, 1), Atom.edge(3, 2), Atom.edge(1, 3),
            Atom.edge(1, 4), Atom.edge(4, 5), Atom.edge(4, 6),
            Atom.edge(4, 1), Atom.edge(5, 4), Atom.edge(6, 4),

            Atom.feature(1), Atom.feature(2), Atom.feature(3),
            Atom.feature(4), Atom.feature(5), Atom.feature(6),
        ],
    )

    train_dataset.add_queries([
        Atom.predict(1)[1],
        Atom.predict(2)[1],
        Atom.predict(3)[1],
        Atom.predict(4)[0],
        Atom.predict(5)[0],
        Atom.predict(6)[0],
    ])

In [16]:
neuralogic_evaluator = get_evaluator(Backend.DYNET, template)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")

Node 1 is predicted to be in triangle: True  | 0.9750847220420837
Node 2 is predicted to be in triangle: True  | 0.9750847220420837
Node 3 is predicted to be in triangle: True  | 0.9750847220420837
Node 4 is predicted to be in triangle: False  | 0.027181323617696762
Node 5 is predicted to be in triangle: False  | 0.027181323617696762
Node 6 is predicted to be in triangle: False  | 0.027181323617696762


In [13]:
test_dataset = Dataset()

with template.context():
    test_dataset.add_example(
        [
            # Encoded graph:      6--4--1---2--8
            #                      \ |   \ /
            #                       5     3--7

            Atom.edge(1, 2), Atom.edge(2, 3), Atom.edge(3, 1),
            Atom.edge(2, 1), Atom.edge(3, 2), Atom.edge(1, 3),
            Atom.edge(1, 4), Atom.edge(4, 5), Atom.edge(4, 6),
            Atom.edge(4, 1), Atom.edge(5, 4), Atom.edge(6, 4),
            Atom.edge(2, 8), Atom.edge(3, 7), Atom.edge(6, 5),
            Atom.edge(8, 2), Atom.edge(7, 3), Atom.edge(5, 6),

            Atom.feature(1), Atom.feature(2), Atom.feature(3),
            Atom.feature(4), Atom.feature(5), Atom.feature(6),
            Atom.faeture(7), Atom.faature(8),
        ],
    )

    test_dataset.add_queries([
        Atom.predict(1)[0],
        Atom.predict(2)[0],
        Atom.predict(3)[0],
        Atom.predict(4)[0],
        Atom.predict(5)[0],
        Atom.predict(6)[0],
        Atom.predict(7)[0],
        Atom.predict(8)[0],
    ])

In [15]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")


Node 1 is predicted to be in triangle: True  | 0.9750847220420837
Node 2 is predicted to be in triangle: True  | 0.9750847220420837
Node 3 is predicted to be in triangle: True  | 0.9750847220420837
Node 4 is predicted to be in triangle: True  | 0.9750847220420837
Node 5 is predicted to be in triangle: True  | 0.9750847220420837
Node 6 is predicted to be in triangle: True  | 0.9750847220420837
Node 7 is predicted to be in triangle: False  | 0.027181323617696762
Node 8 is predicted to be in triangle: False  | 0.027181323617696762



Another slightly more complex example might be capturing cliques
in a graph - i.e. subgraphs that are complete. We present capturing
cliques of the size of four in Example [2](#Example-2:-Capturing-the-pattern-of-a-clique-of-the-size-of-four-nodes). In the
clique example, we utilize an atom with a special predicate `alldiff`,
which guarantees the specified variables in its terms to have unique
values (all different). We use `...` in place of terms, which PyNeuraLogic
later substitutes for all variables found in the current rule.

#### Example 2: Capturing the pattern of a clique of the size of four nodes

In [34]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
train_dataset = Dataset()

with Template(settings).context() as template:
    template.add_rules([
        # Captures clique
        Atom.clique(Var.X)[1,] <= (
            Atom.feature(Var.X)[1,],
            Atom.edge(Var.X, Var.Y), Atom.feature(Var.Y)[1,],
            Atom.edge(Var.X, Var.Z), Atom.feature(Var.Z)[1,],
            Atom.edge(Var.X, Var.R), Atom.feature(Var.R)[1,],
            Atom.edge(Var.Y, Var.Z), Atom.edge(Var.Y, Var.R),
            Atom.edge(Var.Z, Var.R),
            Atom.special.alldiff(Var.X, Var.Y, Var.Z, Var.R),
        ),

        # Captures general graph
        Atom.general(Var.X)[1,] <= (Atom.edge(Var.X, Var.Y), Atom.feature(Var.Y)[1,]),
        Atom.general(Var.X)[1,] <= Atom.feature(Var.Y)[1,],

        Atom.predict(Var.X) <= Atom.general(Var.X)[1,],
        Atom.predict(Var.X) <= Atom.clique(Var.X)[1,],
    ])

    train_dataset.add_example(
        [
            # Encoded graph:      6--4--1---2
            #                        |  |\/|
            #                        5  |/\|
            #                           3--7
            #

            Atom.edge(1, 2), Atom.edge(2, 3), Atom.edge(3, 1),
            Atom.edge(2, 1), Atom.edge(3, 2), Atom.edge(1, 3),
            Atom.edge(1, 4), Atom.edge(4, 5), Atom.edge(4, 6),
            Atom.edge(4, 1), Atom.edge(5, 4), Atom.edge(6, 4),
            Atom.edge(1, 7), Atom.edge(3, 7), Atom.edge(7, 2),
            Atom.edge(7, 1), Atom.edge(7, 3), Atom.edge(2, 7),

            Atom.feature(1), Atom.feature(2), Atom.feature(3),
            Atom.feature(4), Atom.feature(5), Atom.feature(6),
            Atom.feature(7),
        ],
    )

    train_dataset.add_queries([
        Atom.predict(1)[1],
        Atom.predict(2)[1],
        Atom.predict(3)[1],
        Atom.predict(4)[0],
        Atom.predict(5)[0],
        Atom.predict(6)[0],
        Atom.predict(7)[1]
    ])

In [35]:
neuralogic_evaluator = get_evaluator(Backend.DYNET, template)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")



Node 1 is predicted to be in a clique: True  | 0.9797205924987793
Node 2 is predicted to be in a clique: True  | 0.9797205924987793
Node 3 is predicted to be in a clique: True  | 0.9797205924987793
Node 4 is predicted to be in a clique: False  | 0.027427352964878082
Node 5 is predicted to be in a clique: False  | 0.027427352964878082
Node 6 is predicted to be in a clique: False  | 0.027427352964878082
Node 7 is predicted to be in a clique: True  | 0.9797205924987793


In [36]:
test_dataset = Dataset()

with template.context():
    test_dataset.add_example(
        [
            # Encoded graph:      6--4--1---2--9
            #                     |\/|  |\/| \ |
            #                     |/\|  |/\|  10
            #                     8--5  3--7
            #

            Atom.edge(1, 2), Atom.edge(2, 3), Atom.edge(3, 1),
            Atom.edge(2, 1), Atom.edge(3, 2), Atom.edge(1, 3),
            Atom.edge(1, 4), Atom.edge(4, 5), Atom.edge(4, 6),
            Atom.edge(4, 1), Atom.edge(5, 4), Atom.edge(6, 4),

            Atom.edge(1, 7), Atom.edge(3, 7), Atom.edge(7, 2),
            Atom.edge(7, 1), Atom.edge(7, 3), Atom.edge(2, 7),

            Atom.edge(8, 5), Atom.edge(6, 8), Atom.edge(4, 8),
            Atom.edge(5, 8), Atom.edge(8, 6), Atom.edge(8, 4),
            Atom.edge(6, 5), Atom.edge(2, 10), Atom.edge(2, 9),
            Atom.edge(5, 6), Atom.edge(10, 2), Atom.edge(9, 2),

            Atom.feature(1), Atom.feature(2), Atom.feature(3),
            Atom.feature(4), Atom.feature(5), Atom.feature(6),
            Atom.feature(7), Atom.feature(8), Atom.feature(9),
            Atom.feature(10),
        ],
    )

    test_dataset.add_queries([
        Atom.predict(1)[0],
        Atom.predict(2)[0],
        Atom.predict(3)[0],
        Atom.predict(4)[0],
        Atom.predict(5)[0],
        Atom.predict(6)[0],
        Atom.predict(7)[0],
        Atom.predict(8)[0],
        Atom.predict(9)[0],
        Atom.predict(10)[0],
    ])

In [37]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")

Node 1 is predicted to be in a clique: True  | 0.9797205924987793
Node 2 is predicted to be in a clique: True  | 0.9797205924987793
Node 3 is predicted to be in a clique: True  | 0.9797205924987793
Node 4 is predicted to be in a clique: True  | 0.9797205924987793
Node 5 is predicted to be in a clique: True  | 0.9797205924987793
Node 6 is predicted to be in a clique: True  | 0.9797205924987793
Node 7 is predicted to be in a clique: True  | 0.9797205924987793
Node 8 is predicted to be in a clique: True  | 0.9797205924987793
Node 9 is predicted to be in a clique: False  | 0.027427352964878082
Node 10 is predicted to be in a clique: False  | 0.027427352964878082
