# HW5 Info Visualization - Using Graphviz



In [None]:
import os
import math

In [None]:
def get_label(longname):
    """
    Convert a long name to a short version
    """
    # Split the filename by directory separator
    # Splits into multiple parts of the directory path
    parts = longname.split(os.sep)

    # Get the filename
    filename = parts[-1]

    # If filename is 10 characters or less
    if len(filename) <= 10:
        return filename
    else:
        # or return the first seven characters + '...'
        return filename[:7] + '...'


def get_rel_paths(here_path, root_path):
    """
    Given the current full path and the root path, return the
    relative path to the directory above, and the relative path
    to this full path
    """
    # path to here from the root (parent) path
    rel_path = os.path.relpath(here_path, root_path)

    # split the path into parts
    rel_parts = rel_path.split(os.sep)

    # if root path is the rel path then return '.'
    if len(rel_parts) == 1:
        rel_parts = '.'

    return rel_parts, rel_path


def remove_excluded_subdirs(dir_list):
    """
    For os.path.walk(), we can restrict the branches that get traversed by changing
    the list which walk() returns as 'subdirs'.  Remember that when we are editing
    that list, we are actually changing the memory representation inside the walk()
    generator, so the semantics are a little tricky.  For example, we can't simultaneously
    loop over the list and edit it.  I'll give you a working function for this task
    to avoid confusion.

    This function removes 'reveal.js', '.git', and anything starting with '_' from the list.
    """
    more_names_to_remove = [elt for elt in dir_list if elt.startswith('_')][:]
    for name in ['reveal.js', '.git'] + more_names_to_remove:
        if name in dir_list:
            dir_list.remove(name)


def show_this_leaf(nm):
    """
    Use this function to return False for leaf node names you don't want to draw.
    """
    # Exclude leaf nodes with names beginning with '.', '_', or editor backup files
    if nm.startswith('.') or nm.startswith('_') or nm.endswith('~') or nm.startswith('#') and nm.endswith('#'):
        return False

    return True


class Node():
    def __init__(self, parent_node, name):
        """
        parent_node should be the Node instance of the parent.

        name is a long name, like a full relative path.  It needs to be
        unique for all nodes in the tree.

        Note how we make a connection to the parent node when this node is created.
        """
        self.parent = parent_node
        if self.parent is not None:
            self.parent._add_kid(self)
        self.name = name
        self.label = get_label(self.name)
        self.kids = []
        self.descendant_count = 0


    def _add_kid(self, kid_node):
        self.kids.append(kid_node)


    def add_descendant_to_all_ancestors(self):
        """
        Modify this function so that when it is called for a node's parent,
        that parent and all ancestors get their descendant_count incremented.
        """

        # Increment for the current node
        self.descendant_count += 1

        # If node has a parent, call this function
        if self.parent is not None:
            self.parent.add_descendant_to_all_ancestors()



    def write_node(self, indent=0):
        fillcolor = 'lightgray'
        shape = 'ellipse'
        if self.name.endswith(('.png', '.jpg', '.svg')):
            fillcolor = 'blue'
        if not self.kids:
            shape = 'rectangle'
        print(f'{indent*" "}"{self.name}" [label="{self.label}", fillcolor="{fillcolor}", style="filled", shape="{shape}"];')



    def traverse_node_defs(self, indent=0):
        """
        Write the DOT code that defines this Node and all its descendants.

        Here and in traverse_edge_defs(), 'indent' just helps with formatting
        when writing out the DOT code.
        """
        self.write_node(indent=indent)
        for kid in self.kids:
            kid.traverse_node_defs(indent+4)


    def write_incoming_edge(self, this_parent, indent=0):
        penwidth = int(math.sqrt(self.descendant_count))

        print(f'{indent*" "}"{this_parent.name}" -> "{self.name}" [penwidth={penwidth}];')


    def traverse_edge_defs(self, indent=0):
        """
        Write the DOT code that defines the incoming edges for this Node and
        all its descendants.

        Here and in traverse_node_defs(), 'indent' just helps with formatting
        when writing out the DOT code.
        """
        for kid in self.kids:
            kid.write_incoming_edge(self, indent=indent+4)
            kid.traverse_edge_defs(indent+4)


# Maintain a dictionary of Nodes so that we can find them by name.  Use
# paths relative to the root as keys- so the very first key is just '.'
nodes = {}
root_path = "/Users/joonjung/CMU-MS-DAS-Vis-S24/docs"
root_node = Node(None, '.')
nodes['.'] = root_node

# Walk the tree
for dirname, subdirs, files in os.walk(root_path):
    remove_excluded_subdirs(subdirs)
    rel_dir_path, rel_path = get_rel_paths(dirname, root_path)
    if rel_path in nodes:
        # This happens on the very first node
        dir_node = nodes[rel_path]
    else:
        assert rel_dir_path in nodes
        dir_node= Node(nodes[rel_dir_path], rel_path)
        nodes[rel_path] = dir_node

    # Add nodes for all the children of this dir
    for file in files:
        if show_this_leaf(file):
            full_path = os.path.join(dirname, file)
            ignore_this, rel_path = get_rel_paths(full_path, root_path)
            this_node = Node(dir_node, rel_path)
            nodes[rel_path] = this_node

# Calculate number of descendants for all nodes
for node in nodes:
    nodes[node].add_descendant_to_all_ancestors()

# Write out the Dot code
print("digraph {")
print('    graph [layout=fdp, overlap=false];')
root_node.traverse_node_defs()
root_node.traverse_edge_defs()
print("}")

digraph {
    graph [overlap=false];
"." [label=".", fillcolor="lightgray", style="filled", shape="rectangle"];
}


In [None]:
#Leaving comments to use them in the mac terminal
#make sure the file runs in the directory that you want to represent
#you can change the layout setting at the end to test with other cool layouts, on this one, neato, fdp, and sfdp worked well

# to install graphviz, you have to download macports first, and then run those lines
# sudo port install graphviz
# sudo port install graphviz-gui

# in terminal line

# python script_name.py  to get the raw dot file output, or
# python script_name.py > input.dot   to create the input.dot file of output, then you can create png file straight
# dot -Tpng input.dot -o output.png , input.dot would be a name of dot file, this line turns dot file into png image
