In [None]:
# for use in tutorial and development; do not include this `sys.path` change in production:
import sys ; sys.path.insert(0, "../")
import os ; from os.path import dirname

# Evolutionary Shape Prediction

An experiment in evolutionary software using *reinforcement learning* to discover interesting data objects within a given set of graph data.

In [None]:
import kglab

namespaces = {
    "nom":  "http://example.org/#",
    "wtm":  "http://purl.org/heals/food/",
    "ind":  "http://purl.org/heals/ingredient/",
    "skos": "http://www.w3.org/2004/02/skos/core#",
    }

kg = kglab.KnowledgeGraph(
    name = "A recipe KG example based on Food.com",
    base_uri = "https://www.food.com/recipe/",
    language = "en",
    namespaces = namespaces,
    )

kg.load_rdf(dirname(os.getcwd()) + "/dat/recipes.ttl")

In [None]:
import sys
import inspect

__name__ = "kglab"

clsmembers = inspect.getmembers(sys.modules[__name__], inspect.isclass)
clsmembers

## Graph measures and topological analysis

Let's measure this graph, to develop some estimators that we'll use later...

In [None]:
import pandas as pd

pd.set_option("max_rows", None)

measure = kglab.Measure()
measure.measure_graph(kg)

In [None]:
print("edges", measure.edge_count)
print("nodes", measure.node_count)

In [None]:
measure.s_gen.get_tally()

In [None]:
measure.p_gen.get_tally()

In [None]:
measure.o_gen.get_tally()

In [None]:
measure.l_gen.get_tally()

In [None]:
df, link_map = measure.n_gen.get_tally_map()
df

In [None]:
df, link_map = measure.e_gen.get_tally_map()

In [None]:
print(link_map)

## ShapeFactory and evolved shapes

In [None]:
factory = kglab.ShapeFactory(kg, measure)
subgraph = factory.subgraph

In [None]:
es0 = factory.new_shape()

print(es0.serialize(subgraph))
[ print(r) for r in es0.get_rdf() ];

Now we can use this `ShapeFactory` object to evolve a *shape* within the graph, then generate a SPARQL query to test its cardinality:

In [None]:
sparql, bindings = es0.get_sparql()

print(sparql)
print(bindings)

In [None]:
for row in kg.query(sparql):
    print(row)

We can also use this library to construct a specific shape programmatically, e.g., a recipe:

In [None]:
es1 = kglab.EvoShape(kg, measure)

type_uri = "http://purl.org/heals/food/Recipe"
type_node = kglab.EvoShapeNode(uri=type_uri, terminal=True)
es1.add_link(es1.root, kg.get_ns("rdf").type, type_node)

edge_uri = "http://purl.org/heals/food/hasIngredient"
edge_node_uri = "http://purl.org/heals/ingredient/VanillaExtract"
edge_node = kglab.EvoShapeNode(uri=edge_node_uri)
es1.add_link(es1.root, edge_uri, edge_node)

edge_uri = "http://purl.org/heals/food/hasIngredient"
edge_node_uri = "http://purl.org/heals/ingredient/AllPurposeFlour"
edge_node = kglab.EvoShapeNode(uri=edge_node_uri)
es1.add_link(es1.root, edge_uri, edge_node)

edge_uri = "http://purl.org/heals/food/hasIngredient"
edge_node_uri = "http://purl.org/heals/ingredient/Salt"
edge_node = kglab.EvoShapeNode(uri=edge_node_uri)
es1.add_link(es1.root, edge_uri, edge_node)

edge_uri = "http://purl.org/heals/food/hasIngredient"
edge_node_uri = "http://purl.org/heals/ingredient/ChickenEgg"
edge_node = kglab.EvoShapeNode(uri=edge_node_uri)
es1.add_link(es1.root, edge_uri, edge_node)

In [None]:
[ print(r) for r in es1.get_rdf() ]
es1.serialize(subgraph)

In [None]:
sparql, bindings = es1.get_sparql()

print(sparql)
print(bindings)

Query to find matching instances for this shape `es1` within the graph:

In [None]:
for row in kg.query(sparql, bindings=bindings):
    print(row)

## Leaderboard which can be distributed across a cluster

We can calculate metrics to describe how these shapes `es0` and `es1` might rank on a *leaderboard*:

In [None]:
es0.get_cardinality()

In [None]:
es1.get_cardinality()

Then calculate a vector distance between `es1` and `es0` which we'd generated earlier:

In [None]:
es0.calc_distance(es1)

Now we can generate a compact, ordinal representation for the `es1` shape, which can be serialized as a string, transferred across a network to an actor, then deserialized as the same shape -- *as long as we use a similarly structured subgraph*

In [None]:
import json

ser = es1.serialize(subgraph)
j_ser = json.dumps(ser)

print(j_ser)

In [None]:
ser = json.loads(j_ser)
ser

Test the deseserialization

In [None]:
es2 = kglab.EvoShape(kg, measure)
uri_map = es2.deserialize(ser, subgraph)

In [None]:
print(es2.root.uri)

for k, v in uri_map.items():
    print(k, v)

In [None]:
for e in es2.root.edges:
    print("obj", e.obj)
    print("edge", e.pred, e.obj.uri)

In [None]:
for n in es2.nodes:
    print(n)
    print(n.uri)

In [None]:
[ print(r) for r in es2.get_rdf() ]
es2.serialize(subgraph)

In [None]:
es2.get_sparql()

Prototype a leaderboard -

In [None]:
leaderboard = kglab.Leaderboard()
leaderboard.df

In [None]:
dist = leaderboard.add_shape(es0.serialize(subgraph))
print(dist)
leaderboard.df

In [None]:
dist = leaderboard.add_shape(es1.serialize(subgraph))
print(dist)
leaderboard.df

In [None]:
es3 = kglab.EvoShape(kg, measure)

type_uri = "http://purl.org/heals/food/Recipe"
type_node = kglab.EvoShapeNode(uri=type_uri, terminal=True)
es3.add_link(es3.root, kg.get_ns("rdf").type, type_node)

edge_uri = "http://purl.org/heals/food/hasIngredient"
edge_node_uri = "http://purl.org/heals/ingredient/Butter"
edge_node = kglab.EvoShapeNode(uri=edge_node_uri)
es3.add_link(es3.root, edge_uri, edge_node)

In [None]:
shape = es3.serialize(subgraph)
shape

In [None]:
dist = leaderboard.add_shape(es3.serialize(subgraph))
print(dist)

leaderboard.df

## Generating triads from co-occurrence