# Pattern Matching

One of the substantial extensions of vanilla Graph Neural Networks
introduced by the PyNeuraLogic library is capturing underlying graph
patterns. We can, for instance, recognize nodes that are parts of cycles,
such as cycles of the length of three - triangles,
presented in Example [1](#Example-1:-Capturing-the-pattern-of-triangles).

Install PyNeuraLogic and DyNet from PyPI

In [None]:
! pip install neuralogic

In [None]:
! pip install dynet

In [1]:
from neuralogic.nn import get_evaluator
from neuralogic.core import Backend
from neuralogic.core import Relation, Template, Var, Term
from neuralogic.core.settings import Settings, Optimizer
from neuralogic.utils.data import Dataset

#### Example 1: Capturing the pattern of triangles


In [2]:
train_dataset = Dataset()

template = Template()
template.add_rules([
    # Captures triangle
    Relation.triangle(Var.X)[1,] <= (
        Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.feature(Var.Z)[1,],
        Relation.edge(Var.Z, Var.X), Relation.feature(Var.X)[1,],
    ),

    # Captures general graph
    Relation.general(Var.X)[1,] <= (Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,]),
    Relation.general(Var.X)[1,] <= Relation.feature(Var.Y)[1,],

    Relation.predict(Var.X) <= Relation.general(Var.X)[1,],
    Relation.predict(Var.X) <= Relation.triangle(Var.X)[1,],
])

train_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2
        #                        |   \ /
        #                        5    3

        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3),
        Relation.edge(1, 4), Relation.edge(4, 5), Relation.edge(4, 6),
        Relation.edge(4, 1), Relation.edge(5, 4), Relation.edge(6, 4),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
    ],
)

train_dataset.add_queries([
    Relation.predict(1)[1],
    Relation.predict(2)[1],
    Relation.predict(3)[1],
    Relation.predict(4)[0],
    Relation.predict(5)[0],
    Relation.predict(6)[0],
])

In [3]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, Backend.DYNET, settings)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")

[dynet] random seed: 1804914989
[dynet] allocating memory: 512MB
[dynet] memory allocation done.


Node 1 is predicted to be in a triangle: True  | 0.9789628982543945
Node 2 is predicted to be in a triangle: True  | 0.9789628982543945
Node 3 is predicted to be in a triangle: True  | 0.9789628982543945
Node 4 is predicted to be in a triangle: False  | 0.023499129340052605
Node 5 is predicted to be in a triangle: False  | 0.023499129340052605
Node 6 is predicted to be in a triangle: False  | 0.023499129340052605


In [4]:
test_dataset = Dataset()


test_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2--8
        #                      \ |   \ /
        #                       5     3--7

        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3),
        Relation.edge(1, 4), Relation.edge(4, 5), Relation.edge(4, 6),
        Relation.edge(4, 1), Relation.edge(5, 4), Relation.edge(6, 4),
        Relation.edge(2, 8), Relation.edge(3, 7), Relation.edge(6, 5),
        Relation.edge(8, 2), Relation.edge(7, 3), Relation.edge(5, 6),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
        Relation.faeture(7), Relation.faature(8),
    ],
)

test_dataset.add_queries([
    Relation.predict(1)[0],
    Relation.predict(2)[0],
    Relation.predict(3)[0],
    Relation.predict(4)[0],
    Relation.predict(5)[0],
    Relation.predict(6)[0],
    Relation.predict(7)[0],
    Relation.predict(8)[0],
])

In [5]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")


Node 1 is predicted to be in a triangle: True  | 0.9789628982543945
Node 2 is predicted to be in a triangle: True  | 0.9789628982543945
Node 3 is predicted to be in a triangle: True  | 0.9789628982543945
Node 4 is predicted to be in a triangle: True  | 0.9789628982543945
Node 5 is predicted to be in a triangle: True  | 0.9789628982543945
Node 6 is predicted to be in a triangle: True  | 0.9789628982543945
Node 7 is predicted to be in a triangle: False  | 0.023499129340052605
Node 8 is predicted to be in a triangle: False  | 0.023499129340052605



Another slightly more complex example might be capturing cliques
in a graph - i.e. subgraphs that are complete. We present capturing
cliques of the size of four in Example [2](#Example-2:-Capturing-the-pattern-of-a-clique-of-the-size-of-four-nodes). In the
clique example, we utilize an atom with a special predicate `alldiff`,
which guarantees the specified variables in its terms to have unique
values (all different). We use `...` in place of terms, which PyNeuraLogic
later substitutes for all variables found in the current rule.

#### Example 2: Capturing the pattern of a clique of the size of four nodes

In [6]:
train_dataset = Dataset()

template = Template()

template.add_rules([
    # Captures clique
    Relation.clique(Var.X)[1,] <= (
        Relation.feature(Var.X)[1,],
        Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,],
        Relation.edge(Var.X, Var.Z), Relation.feature(Var.Z)[1,],
        Relation.edge(Var.X, Var.R), Relation.feature(Var.R)[1,],
        Relation.edge(Var.Y, Var.Z), Relation.edge(Var.Y, Var.R),
        Relation.edge(Var.Z, Var.R),
        Relation.special.alldiff(Var.X, Var.Y, Var.Z, Var.R),
    ),

    # Captures general graph
    Relation.general(Var.X)[1,] <= (Relation.edge(Var.X, Var.Y), Relation.feature(Var.Y)[1,]),
    Relation.general(Var.X)[1,] <= Relation.feature(Var.Y)[1,],

    Relation.predict(Var.X) <= Relation.general(Var.X)[1,],
    Relation.predict(Var.X) <= Relation.clique(Var.X)[1,],
])

train_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2
        #                        |  |\/|
        #                        5  |/\|
        #                           3--7
        #

        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3),
        Relation.edge(1, 4), Relation.edge(4, 5), Relation.edge(4, 6),
        Relation.edge(4, 1), Relation.edge(5, 4), Relation.edge(6, 4),
        Relation.edge(1, 7), Relation.edge(3, 7), Relation.edge(7, 2),
        Relation.edge(7, 1), Relation.edge(7, 3), Relation.edge(2, 7),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
        Relation.feature(7),
    ],
)

train_dataset.add_queries([
    Relation.predict(1)[1],
    Relation.predict(2)[1],
    Relation.predict(3)[1],
    Relation.predict(4)[0],
    Relation.predict(5)[0],
    Relation.predict(6)[0],
    Relation.predict(7)[1]
])

In [7]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, Backend.DYNET, settings)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")



Node 1 is predicted to be in a clique: True  | 0.9800930619239807
Node 2 is predicted to be in a clique: True  | 0.9800930619239807
Node 3 is predicted to be in a clique: True  | 0.9800930619239807
Node 4 is predicted to be in a clique: False  | 0.02667420543730259
Node 5 is predicted to be in a clique: False  | 0.02667420543730259
Node 6 is predicted to be in a clique: False  | 0.02667420543730259
Node 7 is predicted to be in a clique: True  | 0.9800930619239807


In [8]:
test_dataset = Dataset()


test_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2--9
        #                     |\/|  |\/| \ |
        #                     |/\|  |/\|  10
        #                     8--5  3--7
        #

        Relation.edge(1, 2), Relation.edge(2, 3), Relation.edge(3, 1),
        Relation.edge(2, 1), Relation.edge(3, 2), Relation.edge(1, 3),
        Relation.edge(1, 4), Relation.edge(4, 5), Relation.edge(4, 6),
        Relation.edge(4, 1), Relation.edge(5, 4), Relation.edge(6, 4),

        Relation.edge(1, 7), Relation.edge(3, 7), Relation.edge(7, 2),
        Relation.edge(7, 1), Relation.edge(7, 3), Relation.edge(2, 7),

        Relation.edge(8, 5), Relation.edge(6, 8), Relation.edge(4, 8),
        Relation.edge(5, 8), Relation.edge(8, 6), Relation.edge(8, 4),
        Relation.edge(6, 5), Relation.edge(2, 10), Relation.edge(2, 9),
        Relation.edge(5, 6), Relation.edge(10, 2), Relation.edge(9, 2),

        Relation.feature(1), Relation.feature(2), Relation.feature(3),
        Relation.feature(4), Relation.feature(5), Relation.feature(6),
        Relation.feature(7), Relation.feature(8), Relation.feature(9),
        Relation.feature(10),
    ],
)

test_dataset.add_queries([
    Relation.predict(1)[0],
    Relation.predict(2)[0],
    Relation.predict(3)[0],
    Relation.predict(4)[0],
    Relation.predict(5)[0],
    Relation.predict(6)[0],
    Relation.predict(7)[0],
    Relation.predict(8)[0],
    Relation.predict(9)[0],
    Relation.predict(10)[0],
])

In [9]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")

Node 1 is predicted to be in a clique: True  | 0.9800930619239807
Node 2 is predicted to be in a clique: True  | 0.9800930619239807
Node 3 is predicted to be in a clique: True  | 0.9800930619239807
Node 4 is predicted to be in a clique: True  | 0.9800930619239807
Node 5 is predicted to be in a clique: True  | 0.9800930619239807
Node 6 is predicted to be in a clique: True  | 0.9800930619239807
Node 7 is predicted to be in a clique: True  | 0.9800930619239807
Node 8 is predicted to be in a clique: True  | 0.9800930619239807
Node 9 is predicted to be in a clique: False  | 0.02667420543730259
Node 10 is predicted to be in a clique: False  | 0.02667420543730259
