In [None]:
import sys
import os
import platform

import pandas as pd
import numpy as np
from IPython.display import display, HTML

import hatchet as ht

# Load in a single data file and visualize the tree and dataframe

In [None]:
# Path to a Caliper JSON-split file
caliper_file = "../data/lulesh-1node/lulesh-annotation-profile-1core-nompi.json"

# Read file into a Hatchet GraphFrame using from_caliper() API
gf = ht.GraphFrame.from_caliper(caliper_file)

In [None]:
# Display numeric metric columns (such as inclusive and exlusive time), which can be visualized on the tree
gf.show_metric_columns()

In [None]:
# Print the terminal tree representation using exclusive time
print(gf.tree(metric_column="time"))

In [None]:
# Get the man page of different parameters for Hatchet's tree function
help(gf.tree)

In [None]:
# Print the dataframe of metrics
display(gf.dataframe)

# Filter and squash a tree

In [None]:
# Filter the tree to keep nodes with a time value greater than 300,000 sec
filter_func = lambda x: x["time"] > 300e3
filter_gf = gf.filter(filter_func,
                      squash=False)

In [None]:
display(HTML(filter_gf.dataframe.to_html()))

In [None]:
squash_gf = filter_gf.squash()

In [None]:
# Compare size of input graph and filtered graph
print(f"Input Graph Size                       : {len(gf.graph)}")
print(f"Result (after filter/squash) Graph Size: {len(squash_gf.graph)}")

In [None]:
# Print the resulting tree
print(squash_gf.tree(metric_column="time"))

In [None]:
# Another filter example by function name, filter and squash in a single step
squash_gf = gf.filter(lambda x: x["name"] in ("main", "lulesh.cycle"))
print(squash_gf.tree(metric_column="time"))

# Filter using callpath query language

In [None]:
# Aggregate metrics across ranks (in this case, we only have data for rank 0, so this will
# drop the rank index column)
gf.drop_index_levels(function=np.max)

# Define call path query
query = [{"name": "LagrangeNodal"}, "*"]

# Filter and squash the tree
query_gf = gf.filter(query, squash=True)

# Print the resulting tree
print(query_gf.tree(metric_column="time"))

# Aggregate data across ranks and/or threads

In [None]:
# Path to a Caliper file
caliper_file = "../data/lulesh-1node/lulesh-annotation-profile-27cores.json"

# Read file into a Hatchet GraphFrame
gf = ht.GraphFrame.from_caliper(caliper_file)

display(HTML(gf.dataframe.to_html()))

In [None]:
gf.drop_index_levels(function=np.max)
display(gf.dataframe)

In [None]:
# Print the resulting dataframe of metrics
display(HTML(gf.dataframe.to_html()))

# Calculate speedup between two trees

In [None]:
# Add cali-query to PATH
cali_query_path = "../Caliper-2.5.0/install/bin"
os.environ["PATH"] += os.pathsep + cali_query_path

# Define caliper query for filtering profile
grouping_attribute = "prop:nested"
default_metric = "avg#inclusive#sum#time.duration"
query = "select %s,sum(%s) group by %s format json-split" % (grouping_attribute, default_metric, grouping_attribute)

In [None]:
# Path to Caliper files
cali_file1 = "../data/cDPu64825TuLB5ujG_0.cali"
cali_file2 = "../data/cjDCIuaXAoayBi9Lr_2.cali"

# Read Caliper files into a Hatchet GraphFrame using from_caliper() API
gf = ht.GraphFrame.from_caliper(cali_file1, query)   # 1 rank
gf2 = ht.GraphFrame.from_caliper(cali_file2, query)  # 64 ranks

In [None]:
# Print the tree representation using the exclusive time metric
print(gf.tree(metric_column="time (inc)"))

In [None]:
# Print the tree representation using the exclusive time metric
print(gf2.tree(metric_column="time (inc)"))

In [None]:
# Compute the speedup of 1 rank to 64 ranks
gf3 = gf / gf2

In [None]:
# Print the resulting tree
# Two things to note here:
# 1) The MPI nodes are annotated with a green arrow that points to the right. This indicates that those
#    nodes exist only in the right tree (i.e., 64 ranks). By right tree, we are referring to the position
#    in the equation gf3 = gf / gf2 as shown in the above cell.
# 2) Nodes with good speedup are highlighted in red, but may be preferred to color these nodes in green.
print(gf3.tree(metric_column="time (inc)"))

In [None]:
# Print resulting tree, but reverse the color scheme, so red identifies nodes with poor scaling (low values)
print(gf3.tree(metric_column="time (inc)",
               invert_colormap=True))

In [None]:
# Print the resulting dataframe of metrics
display(gf3.dataframe)