In [1]:
### TODO DBG: Docstring or top-level comment
# IMPORTANT: MAKE SURE YOU'RE RUNNING THIS NOTEBOOK FROM THE TOP-LEVEL OF THE Grid repo 
### TODO DBG: Remove this block, it's just because my env isn't setup with Torch
import sys
from unittest.mock import Mock
sys.modules["torch"] = Mock()

In [2]:
# Imports and constants
from grid.ipfsapi.version_tree import VersionTreeNode
from grid.ipfsapi.client import Client as IpfsClient

You'll need to have an IPFS daemon API server running at localhost:5001

In [3]:
ipfs_client = IpfsClient()

We'll create and upload to IPFS a chain of nodes of a new version tree, starting with the root.  The IPFS Client object can either be provided during init, or passed directly into methods that interact with IPFS.

In [4]:
DELIM = VersionTreeNode.DELIMITER
contents = [b"root content",
            b"1st child of root",
            b"2nd child of root",
            b"grandchild, with " + DELIM + b"delimiter" + DELIM]

root_node = VersionTreeNode(contents=contents[0], ipfs_client=ipfs_client)  # no parent hash required
root_hash = root_node.commit()  # The IPFS multihash addressing the serialized VersionTreeNode object

first_child_node = VersionTreeNode(contents=contents[1], parent_hash=root_hash, ipfs_client=ipfs_client)
first_child_hash = first_child_node.commit()

second_child_node = VersionTreeNode(contents=contents[2], parent_hash=root_hash, ipfs_client=ipfs_client)
second_child_hash = second_child_node.commit()

# Contents that contain the delimiter should still behave properly.
grandchild_node = VersionTreeNode(contents=contents[3], parent_hash=first_child_hash)
grandchild_hash = grandchild_node.commit(ipfs_client)

We've now created four nodes, each with different contents, and with edges from each child to its parent, addressed by its multihash.  The root in the tree we've created has two children, one of which has its own child.

Let's start by retrieving each node directly by hash, using the static get_node_by_hash method.

In [5]:
def print_nodes(nodes):
    print("\n\n".join(map(str, nodes)))

hashes = [root_hash, first_child_hash, second_child_hash, grandchild_hash]
retrieved_nodes = [VersionTreeNode.get_node_by_hash(h, ipfs_client) for h in hashes]
print_nodes(retrieved_nodes)
assert retrieved_nodes == [root_node, first_child_node, second_child_node, grandchild_node]

VersionTreeNode with contents: b'root content'
parent_hash: None

VersionTreeNode with contents: b'1st child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'2nd child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'grandchild, with |delimiter|'
parent_hash: QmYHZ3BaivYbhzz86jCUtJLsj3BtjHTX8A1NX5gVLh6LHf


Now we'll try out the get_with_ancestors() method, which returns an iterator of nodes that represent the node and all of its ancestors, from leaf to root.  Note that since this returns an iterator, we can go as far back in the tree as we like without incurring the network or computational cost of retrieving the whole chain.

In [11]:
# Nodes that were not instantiated with an IPFS client need to pass one in.
print("******* Grandchild node ancestor chain: ************")
print_nodes(grandchild_node.get_with_ancestors(ipfs_client))
print("\n******* First child node ancestor chain: ***********")
print_nodes(first_child_node.get_with_ancestors())
print("\n******* Second child node ancestor chain: ***********")
print_nodes(second_child_node.get_with_ancestors())
print("\n******* Root's ancestor chain: *******")
print_nodes(root_node.get_with_ancestors())

******* Grandchild node ancestor chain: ************
VersionTreeNode with contents: b'grandchild, with |delimiter|'
parent_hash: QmYHZ3BaivYbhzz86jCUtJLsj3BtjHTX8A1NX5gVLh6LHf

VersionTreeNode with contents: b'1st child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'root content'
parent_hash: None

******* First child node ancestor chain: ***********
VersionTreeNode with contents: b'1st child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'root content'
parent_hash: None

******* Second child node ancestor chain: ***********
VersionTreeNode with contents: b'2nd child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'root content'
parent_hash: None

******* Root's ancestor chain: *******
VersionTreeNode with contents: b'root content'
parent_hash: None


get_node_with_ancestors_by_hash() is a convenience method that combines get_node_by_hash() and get_with_ancestors()

In [7]:
print_nodes(grandchild_node.get_node_with_ancestors_by_hash(grandchild_hash, ipfs_client))

VersionTreeNode with contents: b'grandchild, with |delimiter|'
parent_hash: QmYHZ3BaivYbhzz86jCUtJLsj3BtjHTX8A1NX5gVLh6LHf

VersionTreeNode with contents: b'1st child of root'
parent_hash: QmatVYrLdmpKurr5APtAq9Qqp4rFuUkTPEpDLuuhsTtPbY

VersionTreeNode with contents: b'root content'
parent_hash: None
