# Pattern Matching

One direct extension of vanilla Graph Neural Networks
that you can easily explore with PyNeuraLogic is utilizing sub-graph
pattern expressions (for various subgraph/graphlet/motif GNNs). We can, for instance, recognize nodes that are parts of cycles,
such as cycles of the length of three - triangles,
presented in Example 1

Install PyNeuraLogic from PyPI

In [None]:
! pip install neuralogic

In [1]:
from neuralogic.nn import get_evaluator
from neuralogic.core import Backend, R, Template, V, Settings, Optimizer, Activation
from neuralogic.dataset import Dataset

#### Example 1: Capturing the pattern of triangles


In [2]:
train_dataset = Dataset()

template = Template()
template.add_rules([
    # a triangle pattern rule
    R.triangle(V.X)[1,] <= (
        R.edge(V.X, V.Y), R.feature(V.Y)[1,],
        R.edge(V.Y, V.Z), R.feature(V.Z)[1,],
        R.edge(V.Z, V.X), R.feature(V.X)[1,],
    ),

    # a generic GNN rule
    R.general(V.X)[1,] <= (R.edge(V.Y, V.X), R.feature(V.Y)[1,]),
    # base case for unconnected graphs
    R.general(V.X)[1,] <= R.feature(V.Y)[1,],

    # combining the previous
    (R.predict(V.X) <= R.general(V.X)[1,]) | [Activation.SIGMOID],
    (R.predict(V.X) <= R.triangle(V.X)[1,]) | [Activation.SIGMOID],
])

train_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2
        #                        |   \ /
        #                        5    3

        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3),
        R.edge(1, 4), R.edge(4, 5), R.edge(4, 6),
        R.edge(4, 1), R.edge(5, 4), R.edge(6, 4),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
    ],
)

train_dataset.add_queries([
    R.predict(1)[1],
    R.predict(2)[1],
    R.predict(3)[1],
    R.predict(4)[0],
    R.predict(5)[0],
    R.predict(6)[0],
])

In [3]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, settings)

built_dataset = neuralogic_evaluator.build_dataset(train_dataset)

for _ in neuralogic_evaluator.train(built_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(built_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")

Node 1 is predicted to be in a triangle: True  | 0.7309413577187857
Node 2 is predicted to be in a triangle: True  | 0.7309413577187857
Node 3 is predicted to be in a triangle: True  | 0.7309413577187857
Node 4 is predicted to be in a triangle: False  | 0.195771188787338
Node 5 is predicted to be in a triangle: False  | 0.195771188787338
Node 6 is predicted to be in a triangle: False  | 0.195771188787338


In [4]:
test_dataset = Dataset()


test_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2--8
        #                      \ |   \ /
        #                       5     3--7

        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3),
        R.edge(1, 4), R.edge(4, 5), R.edge(4, 6),
        R.edge(4, 1), R.edge(5, 4), R.edge(6, 4),
        R.edge(2, 8), R.edge(3, 7), R.edge(6, 5),
        R.edge(8, 2), R.edge(7, 3), R.edge(5, 6),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
        R.faeture(7), R.faature(8),
    ],
)

test_dataset.add_queries([
    R.predict(1)[0],
    R.predict(2)[0],
    R.predict(3)[0],
    R.predict(4)[0],
    R.predict(5)[0],
    R.predict(6)[0],
    R.predict(7)[0],
    R.predict(8)[0],
])

test_dataset = neuralogic_evaluator.build_dataset(test_dataset)

In [5]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a triangle: {bool(int(round(predicted)))}  | {predicted}")


Node 1 is predicted to be in a triangle: True  | 0.7287371430194902
Node 2 is predicted to be in a triangle: True  | 0.7287371430194902
Node 3 is predicted to be in a triangle: True  | 0.7287371430194902
Node 4 is predicted to be in a triangle: True  | 0.7287371430194902
Node 5 is predicted to be in a triangle: True  | 0.7287371430194902
Node 6 is predicted to be in a triangle: True  | 0.7287371430194902
Node 7 is predicted to be in a triangle: False  | 0.1900394692120154
Node 8 is predicted to be in a triangle: False  | 0.1900394692120154




A similar extension might be capturing cliques
in a graph - i.e. subgraphs that are complete. We present capturing
cliques of the size of four.
Here we utilize a special predicate `alldiff`,
which guarantees the specified logical variables in its terms to have unique
values (all different). We use `...` in place of terms, which PyNeuraLogic
substitutes for all variables found in the current rule.

#### Example 2: Capturing the pattern of a clique of the size of four nodes

In [6]:
train_dataset = Dataset()

template = Template()

template.add_rules([
    # Captures clique
    R.clique(V.X)[1,] <= (
        R.feature(V.X)[1,],
        R.edge(V.X, V.Y), R.feature(V.Y)[1,],
        R.edge(V.X, V.Z), R.feature(V.Z)[1,],
        R.edge(V.X, V.R), R.feature(V.R)[1,],
        R.edge(V.Y, V.Z), R.edge(V.Y, V.R),
        R.edge(V.Z, V.R),
        R.special.alldiff(V.X, V.Y, V.Z, V.R),
    ),

    # Captures general graph
    R.general(V.X)[1,] <= (R.edge(V.Y, V.X), R.feature(V.Y)[1,]),
    R.general(V.X)[1,] <= R.feature(V.Y)[1,],

    (R.predict(V.X) <= R.general(V.X)[1,]) | [Activation.SIGMOID],
    (R.predict(V.X) <= R.clique(V.X)[1,]) | [Activation.SIGMOID],
])

train_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2
        #                        |  |\/|
        #                        5  |/\|
        #                           3--7
        #

        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3),
        R.edge(1, 4), R.edge(4, 5), R.edge(4, 6),
        R.edge(4, 1), R.edge(5, 4), R.edge(6, 4),
        R.edge(1, 7), R.edge(3, 7), R.edge(7, 2),
        R.edge(7, 1), R.edge(7, 3), R.edge(2, 7),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
        R.feature(7),
    ],
)

train_dataset.add_queries([
    R.predict(1)[1],
    R.predict(2)[1],
    R.predict(3)[1],
    R.predict(4)[0],
    R.predict(5)[0],
    R.predict(6)[0],
    R.predict(7)[1]
])

In [7]:
settings = Settings(optimizer=Optimizer.SGD, epochs=200)
neuralogic_evaluator = get_evaluator(template, settings, Backend.JAVA)

train_dataset = neuralogic_evaluator.build_dataset(train_dataset)

for _ in neuralogic_evaluator.train(train_dataset):
    pass

for node, (label, predicted) in enumerate(neuralogic_evaluator.test(train_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")



Node 1 is predicted to be in a clique: True  | 0.771131111165391
Node 2 is predicted to be in a clique: True  | 0.771131111165391
Node 3 is predicted to be in a clique: True  | 0.771131111165391
Node 4 is predicted to be in a clique: False  | 0.1988256262921168
Node 5 is predicted to be in a clique: False  | 0.1988256262921168
Node 6 is predicted to be in a clique: False  | 0.1988256262921168
Node 7 is predicted to be in a clique: True  | 0.771131111165391


In [8]:
test_dataset = Dataset()


test_dataset.add_example(
    [
        # Encoded graph:      6--4--1---2--9
        #                     |\/|  |\/| \ |
        #                     |/\|  |/\|  10
        #                     8--5  3--7
        #

        R.edge(1, 2), R.edge(2, 3), R.edge(3, 1),
        R.edge(2, 1), R.edge(3, 2), R.edge(1, 3),
        R.edge(1, 4), R.edge(4, 5), R.edge(4, 6),
        R.edge(4, 1), R.edge(5, 4), R.edge(6, 4),

        R.edge(1, 7), R.edge(3, 7), R.edge(7, 2),
        R.edge(7, 1), R.edge(7, 3), R.edge(2, 7),

        R.edge(8, 5), R.edge(6, 8), R.edge(4, 8),
        R.edge(5, 8), R.edge(8, 6), R.edge(8, 4),
        R.edge(6, 5), R.edge(2, 10), R.edge(2, 9),
        R.edge(5, 6), R.edge(10, 2), R.edge(9, 2),

        R.feature(1), R.feature(2), R.feature(3),
        R.feature(4), R.feature(5), R.feature(6),
        R.feature(7), R.feature(8), R.feature(9),
        R.feature(10),
    ],
)

test_dataset.add_queries([
    R.predict(1)[0],
    R.predict(2)[0],
    R.predict(3)[0],
    R.predict(4)[0],
    R.predict(5)[0],
    R.predict(6)[0],
    R.predict(7)[0],
    R.predict(8)[0],
    R.predict(9)[0],
    R.predict(10)[0],
])

test_dataset = neuralogic_evaluator.build_dataset(test_dataset)

In [9]:
for node, (label, predicted) in enumerate(neuralogic_evaluator.test(test_dataset)):
    print(f"Node {node + 1} is predicted to be in a clique: {bool(int(round(predicted)))}  | {predicted}")

Node 1 is predicted to be in a clique: True  | 0.771131111165391
Node 2 is predicted to be in a clique: True  | 0.771131111165391
Node 3 is predicted to be in a clique: True  | 0.771131111165391
Node 4 is predicted to be in a clique: True  | 0.771131111165391
Node 5 is predicted to be in a clique: True  | 0.771131111165391
Node 6 is predicted to be in a clique: True  | 0.771131111165391
Node 7 is predicted to be in a clique: True  | 0.771131111165391
Node 8 is predicted to be in a clique: True  | 0.771131111165391
Node 9 is predicted to be in a clique: False  | 0.1988256262921168
Node 10 is predicted to be in a clique: False  | 0.1988256262921168
