In [1]:
# tutorial set up only; do not include this `sys.path` change in production:
import sys ; sys.path.insert(0, "../")

# Minimal Example

A minimal example of how to build a `NOCK` partition programmatically.
This generates the `dat/tiny.*` files, based on the recipe for [_Anytime Crepes_](https://www.food.com/recipe/327593) on Food.com

Import the dependencies

In [2]:
from icecream import ic
import cloudpathlib

from pynock import Edge, Node, Partition

Create the partition

In [3]:
part: Partition = Partition(
    part_id = 0,
)

Perform lookup/create for the `src` node for the `"Anytime Crepes"` recipe

NB: this node has properties, which RDF cannot access

In [4]:
src_name: str = "https://www.food.com/recipe/327593"
src_node: Node = part.find_or_create_node(src_name)

src_node.is_rdf = True
src_node.label_set = set(["Recipe"])
src_node.prop_map = {
    "minutes": 8,
    "name": "anytime crepes",
}

Perform lookup/create for the `dst` node for the `"Egg"` ingredient

In [5]:
dst_name: str = "http://purl.org/heals/ingredient/ChickenEgg"
dst_node: Node = part.find_or_create_node(dst_name)

dst_node.is_rdf = True
dst_node.label_set = set(["Ingredient"])

Define an edge connecting `src` => `dst` for this ingredient

In [6]:
part.create_edge(
    src_node,
    "http://purl.org/heals/food/uses_ingredient",
    dst_node,
)

Edge(rel=1, node_id=1, truth=1.0, prop_map={})

Perform lookup/create for the `dst` node for the `"Milk"` ingredient

In [7]:
dst_name = "http://purl.org/heals/ingredient/CowMilk"
dst_node = part.find_or_create_node(dst_name)

dst_node.is_rdf = True
dst_node.label_set = set(["Ingredient"])

Define an edge connecting `src` => `dst` for this ingredient

In [8]:
part.create_edge(
    src_node,
    "http://purl.org/heals/food/uses_ingredient",
    dst_node,
)

Edge(rel=1, node_id=2, truth=1.0, prop_map={})

Perform lookup/create for the `dst` node for the `"Flour"` ingredient

NB: this node has properties, which RDF cannot access 

In [9]:
dst_name = "http://purl.org/heals/ingredient/WholeWheatFlour"
dst_node = part.find_or_create_node(dst_name)

dst_node.is_rdf = True
dst_node.label_set = set(["Ingredient"])
dst_node.prop_map = {
    "vegan": True,
}

Define an edge connecting `src` => `dst` for this ingredient

In [10]:
part.create_edge(
    src_node,
    "http://purl.org/heals/food/uses_ingredient",
    dst_node,
)

Edge(rel=1, node_id=3, truth=1.0, prop_map={})

Perform lookup/create for the `dst` node for the `"wtm:Recipe"` parent

In [11]:
dst_name = "http://purl.org/heals/food/Recipe"
dst_node = part.find_or_create_node(dst_name)

dst_node.is_rdf = True
dst_node.label_set = set(["top_level"])

Define an edge connecting `src` => `dst` for this inheritance

In [12]:
part.create_edge(
    src_node,
    "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
    dst_node,
)

Edge(rel=2, node_id=4, truth=1.0, prop_map={})

Serialize the partition to multiple formats

In [13]:
part.save_file_parquet(
    cloudpathlib.AnyPath("foo.parq"),
)

part.save_file_csv(
    cloudpathlib.AnyPath("foo.csv"),
    sort = True,
)

part.save_file_rdf(
    cloudpathlib.AnyPath("foo.ttl"),
    rdf_format = "ttl",
)

Check the files "foo.*" to see what was constructed programmatically

In [14]:
!cat foo.ttl

@prefix ns1: <http://purl.org/heals/food/> .

<https://www.food.com/recipe/327593> a ns1:Recipe ;
    ns1:uses_ingredient <http://purl.org/heals/ingredient/ChickenEgg>,
        <http://purl.org/heals/ingredient/CowMilk>,
        <http://purl.org/heals/ingredient/WholeWheatFlour> .



In [15]:
!cat foo.csv

"src_name","edge_id","rel_name","dst_name","truth","shadow","is_rdf","labels","props"
"http://purl.org/heals/food/Recipe",-1,"","",1.0,-1,True,"top_level",""
"http://purl.org/heals/ingredient/ChickenEgg",-1,"","",1.0,-1,True,"Ingredient",""
"http://purl.org/heals/ingredient/CowMilk",-1,"","",1.0,-1,True,"Ingredient",""
"http://purl.org/heals/ingredient/WholeWheatFlour",-1,"","",1.0,-1,True,"Ingredient","{""vegan"":true}"
"https://www.food.com/recipe/327593",-1,"","",1.0,-1,True,"Recipe","{""minutes"":8,""name"":""anytime crepes""}"
"https://www.food.com/recipe/327593",0,"http://purl.org/heals/food/uses_ingredient","http://purl.org/heals/ingredient/ChickenEgg",1.0,-1,True,"",""
"https://www.food.com/recipe/327593",1,"http://purl.org/heals/food/uses_ingredient","http://purl.org/heals/ingredient/CowMilk",1.0,-1,True,"",""
"https://www.food.com/recipe/327593",2,"http://purl.org/heals/food/uses_ingredient","http://purl.org/heals/ingredient/WholeWheatFlour",1.0,-1,True,"",""
"https://www.foo

Show the dataframe representation

In [16]:
df = part.to_df()
df.head()

Unnamed: 0,src_name,edge_id,rel_name,dst_name,truth,shadow,is_rdf,labels,props
0,https://www.food.com/recipe/327593,-1,,,1.0,-1,True,Recipe,"{""minutes"":8,""name"":""anytime crepes""}"
1,https://www.food.com/recipe/327593,0,http://purl.org/heals/food/uses_ingredient,http://purl.org/heals/ingredient/ChickenEgg,1.0,-1,True,,
2,https://www.food.com/recipe/327593,1,http://purl.org/heals/food/uses_ingredient,http://purl.org/heals/ingredient/CowMilk,1.0,-1,True,,
3,https://www.food.com/recipe/327593,2,http://purl.org/heals/food/uses_ingredient,http://purl.org/heals/ingredient/WholeWheatFlour,1.0,-1,True,,
4,https://www.food.com/recipe/327593,3,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://purl.org/heals/food/Recipe,1.0,-1,True,,
