In [1]:
from dvc.repo import Repo as DVCRepo
import pathlib
r = DVCRepo(r"D:\ig_pipeline")

In [2]:
# Boolean flag determining whether or not stages will be checked for status
CHECK_PROCESSED = True

In [3]:
stages = r.stages

In [4]:
dir(next(s for s in stages if s.is_data_source))
print(pathlib.Path(next(s for s in stages if s.is_data_source).outs[0].fs_path).name)

processed.max


In [5]:
s_to_deps = {s: [d.fs_path for d in s.deps] for s in stages}
out_to_s = {o.fs_path: s for s in stages for o in s.outs}

In [6]:
import collections
out_to_s_dup = collections.defaultdict(list)
for s in stages:
    for o in s.outs:
        out_to_s_dup[o.fs_path].append(s)

In [7]:
[x for x, v in out_to_s_dup.items() if len(v) > 1]

[]

In [8]:
import networkx as nx

In [9]:
G = nx.DiGraph()

In [10]:
def name(s):
    if s.is_data_source:
        return pathlib.Path(s.outs[0].fs_path).name
    return s.name

In [11]:
def canonicalize(s):
    if s.is_data_source:
        return pathlib.Path(s.outs[0].fs_path).name
        
    return s.name.split("@")[0] if not s.is_data_source else "data_source"

In [12]:
if CHECK_PROCESSED:
    r.lock.lock()

# Process nodes
for s in s_to_deps.keys():
    this_node = canonicalize(s)
    if this_node not in G.nodes:
        G.add_node(this_node, total=0, changed=0, total_set=set(), changed_set=set())
        
    if CHECK_PROCESSED:
        G.nodes[this_node]["total"] += 1
        G.nodes[this_node]["total_set"].add(name(s))
        if s.changed():
            G.nodes[this_node]["changed"] += 1
            G.nodes[this_node]["changed_set"].add(name(s))
    
# Add dependencies
for s in s_to_deps.keys():
    this_node = canonicalize(s)
    for dep in s_to_deps[s]:
        if dep in out_to_s:
            from_node = canonicalize(out_to_s[dep])
            G.add_edge(from_node, this_node)
            
if CHECK_PROCESSED:
    r.lock.unlock()

In [13]:
for x in nx.topological_sort(G):
    print(x)

aggregate_scenes
combined_room_object_list
processed.max
raw.max
textures
collision_meshes.zip
bathroom.max
living.max
proxy
object_list
room_object_list
sanitycheck
object_list_success
combined_room_object_list_future
sanitycheck_success
object_inventory
object_inventory_future
export_meshes
aggregate_metadata
object_inventory_success
export_meshes_success
aggregate_metadata_success
export_objs_global
export_objs_global_success
make_maps
usdify_objects
pack_dataset
generate_object_images
usdify_dataset


In [14]:
# In mermaid format for pasting into README
for f, t in G.edges:
    print(f"    {f} --> {t}")

    aggregate_metadata --> aggregate_metadata_success
    aggregate_metadata --> make_maps
    aggregate_metadata --> pack_dataset
    aggregate_metadata --> usdify_objects
    aggregate_metadata_success --> pack_dataset
    aggregate_metadata_success --> usdify_objects
    aggregate_scenes --> make_maps
    aggregate_scenes --> pack_dataset
    export_meshes --> export_meshes_success
    export_meshes --> export_objs_global
    export_meshes_success --> export_objs_global
    export_objs_global --> export_objs_global_success
    export_objs_global --> make_maps
    export_objs_global --> pack_dataset
    export_objs_global --> usdify_objects
    export_objs_global_success --> pack_dataset
    object_inventory --> aggregate_metadata
    object_inventory --> object_inventory_success
    object_list --> export_meshes
    object_list --> export_objs_global
    object_list --> object_inventory
    object_list --> object_inventory_future
    object_list --> object_list_success
    object_li

In [15]:
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import graphviz_layout
pos = graphviz_layout(G, prog="dot")
nx.draw(G, pos)
plt.show()

"dot" with args ['-Tdot', 'C:\\Users\\Cem\\AppData\\Local\\Temp\\tmpo4ws8r18'] returned code: 1

stdout, stderr:
 b''



AssertionError: "dot" with args ['-Tdot', 'C:\\Users\\Cem\\AppData\\Local\\Temp\\tmpo4ws8r18'] returned code: 1

In [None]:
if CHECK_PROCESSED:
    print("Completion ratios:")
    for x in nx.topological_sort(G):
        completion_ratio = 1 - (G.nodes[x]["changed"] / G.nodes[x]["total"])
        completion_percentage = int(completion_ratio * 100)
        print(f"{x}: {completion_percentage}%")

In [None]:
completed_meshes = G.nodes["export_meshes"]["total_set"] - G.nodes["export_meshes"]["changed_set"]
for e in sorted(completed_meshes):
    obj = e.split("/")[-1]
    print(f'    "{obj}",')


In [22]:
# Clear out the stuff that failed
import ruamel.yaml
import glob, json

with open(r"D:\ig_pipeline\dvc.lock") as f:
    yaml = ruamel.yaml.YAML(typ="rt")
    lock = yaml.load(f)

stages = list(lock["stages"].keys())
for stage in stages:
    if "legacy_" in stage and "legacy_batch" not in stage:
        del lock["stages"][stage]
    
with open(r"D:\ig_pipeline\dvc.lock", "w") as f:
    yaml = ruamel.yaml.YAML()
    yaml.default_flow_style = False
    yaml.dump(lock, f)