# pyCustoms

Matlab's functions are straightforward: One file per function, send in inputs and get outputs.  Python likes namespaces, modules, classes, objects, and packages so a lot of capability is buried deep in package hierarchies.  Combine that with documentation that varies from good to nonexistent and it can be tough to figure out what to invoke and how to invoke it.

pyCustoms is experimental spaghetti code to inspect a package and figure out what links to what.  It uses importlib to programmatically load packages, dir to get information, and inspect to identify builtins, classes, functions, modules, and 'none of the above' aka nofta.

pyCustoms returns a tree-like defaultdict of all results and a dependency list of tuples that show how modules, which seem to be the most important, link to one another.

A separate draw_graph function takes the pyCustoms results and uses graphviz to generate a visual representation with some choice of engines, layout options, and the like.  The network view is amusing initially since the right choice of parameters can make it look like a fireworks display.  It takes a lot of zooming and panning to see the details and ultimately the prettyprinted defaultdict is faster and more useful. 

This code could be improved substantially, turned into a class, and so on but the results of this experiment don't justify that effort.

Ravi Narasimhan <br> 
3 June 2017

In [1]:
  
def pyCustoms(pkg_name, package_dictionary=None, dependency=None): 
    """
    pyCustoms inspects packages, walking down '.' module hierarchies and finding
    methods, functions, classes, builtins, and none-of-the-above (aka nofta)
    
    Input: Package name as a string, imports it if it exists, exits if it doesn't    
     
    Returns: 
    A dictionary, pkg_dict:
    - keys are <module name>-methods, functions, classes, builtins, or nofta
    - values are the names of the methods, functions, classes, builtins, or nofta
    
    and
    
    A module dependency list of tuples, dependency:
    - Each tuple is in the form (parent module, child module)
    - This shows the hierarchy of modules subject to limitations below
    
    pyCustoms ignores values prefixed with _ or __
    
    Some modules exist, can be inspected with dir(), but cannot be imported via
    the importlib.import_module.  pycustoms reports this as 'Uninspectable'
    and continues
    
    Usage: Enter at any level of a hierarchy
    
    ans = pyCustoms('numpy.random')  # Explore a numpy submodule
    
    for key in sorted(ans.iterkeys()):  # Print the dictionary
        print "{}:\\n {} \\n".format(key,ans[key])   
        
    --> Large output
    
    or 
    
    import pprint
    pprint.pprint(sorted(ans.iteritems()))
    
    Ravi Narasimhan  3 June 2017
    
    """
    
    import inspect
    import importlib
    import sys
    import functools
    import __main__
    from collections import defaultdict
    
    try:
        pkg = importlib.import_module(pkg_name)
    except ImportError:
        print "Can't find  {}.  Stopping".format(pkg_name)
        raise sys.exit(1)
        
    
    modules = []
    functions = []
    classes = []
    builtins = []
    noneoftheabove = []
    
    
    
    if not package_dictionary:
        pkg_dict=defaultdict(dict) # Create an empty dict if no dictionary is passed      
    else:
        pkg_dict = package_dictionary
    if not dependency:
        dependency = []
        
    for j in dir(pkg):
        if inspect.ismodule(getattr(pkg,j)):
             modules.append(j)
        elif inspect.isfunction((getattr(pkg,j))):
             functions.append(j)
        elif inspect.isclass((getattr(pkg,j))):
             classes.append(j)
        elif inspect.isbuiltin((getattr(pkg,j))):
             builtins.append(j)
        else:
             noneoftheabove.append(j)

    pkg_dict[pkg_name]['builtins'] = builtins
    pkg_dict[pkg_name]['modules'] = modules
    pkg_dict[pkg_name]['classes'] = classes
    pkg_dict[pkg_name]['functions'] = functions
    pkg_dict[pkg_name]['nofta'] = noneoftheabove      

    for k in modules:
        if not k.startswith('_'): # Let's skip the unders and dunders
            # importlib can't open everything for some reason
            # trap errors and skip
            # getattr and reduce might be able to get past this problem
            # but there could be circular references as well as further
            # clutter
            try:
                pkg = importlib.import_module(pkg_name+'.'+k)
            except ImportError:                
                pkg_dict[pkg_name+'.'+k] = r'Uninspectable'
                dependency.append((pkg_name, pkg_name+'.'+k)) 
                continue
            dependency.append((pkg_name, pkg_name+'.'+k)) 
            pyCustoms(pkg_name+'.'+k,pkg_dict, dependency)
    # print dependency        
    return pkg_dict, dependency



In [2]:


# Let's try to visualize the network.  The engine and
# format options are the most important.  Some choices
# return a result worth looking at.  Others either don't or
# hang, especially on the complicated packages like numpy
# circo is a usable engine although it doesn't support many
# of the attributes that graphviz allows like fontsize

def draw_graph (ans, dep):
    from graphviz import Digraph, Graph
    u = Digraph(package, filename = package+'.gv', 
                engine = 'circo', format = 'pdf')

    # Setting an explicit size usually makes for a loadable PDF
    # The adjustable scale seems to help but I can't be sure that it does
    # For numpy and circo, u.attr(size='36,36'), u.graph_attr.update({'scale':'0.66'})
    # gives a pdf that loads in finite time and allows navigation.  Full redraws
    # can be slow, though.  Acrobat DC is surprisingly better than Skim
    # Taking out the shapes around nodes speeds things up.  Perhaps
    # the graphviz primitives have to be interpreted one-by-one by the 
    # PDF reader.  I had very little success with svg output.  The 
    # nodes and edges are so faint, they are almost impossible to read.
    # The size attributes for svg are not the same as pdf and regardless of
    # settings, Firefox, Safari, Brave, and other browsers had great 
    # trouble rendering the files

    # Set the pdf page size in in x in
    u.attr(size='36,36')

    # This is how to change the scale after layout.  Let's leave it in in case
    # we need to do it 
    u.graph_attr.update({'scale':'1'})

    # Jerry-rig the fontsize to make the plot readable
    # circo doesn't recognize this, though
    top_level_fontsize = 16  

    # Let's make a rudimentary style sheet so we can experiment
    # I originally had different shapes for the _nodes but I think this makes
    # for very slow PDF performance.  
    module_node = {'color':'blue', 'shape':'box', 'fontsize':str(top_level_fontsize*1)}
    module_edge = {'color':'blue', 'style':'dashed', 'penwidth':'1'}
    builtins_node = {'color':'red', 'shape':'none', 'fontsize':str(top_level_fontsize*1)}
    builtins_edge = {'color':'red', 'style':'dashed', 'penwidth':'1'}
    classes_node = {'color':'gold', 'shape':'none', 'fontsize':str(top_level_fontsize*1)}
    classes_edge = {'color':'gold', 'style':'dashed', 'penwidth':'1'}
    functions_node = {'color':'purple', 'shape':'none', 'fontsize':str(top_level_fontsize*1)}
    functions_edge = {'color':'purple', 'style':'dashed', 'penwidth':'1'}
    nofta_node = {'color':'black', 'shape':'none', 'fontsize':str(top_level_fontsize*1)}
    nofta_edge = {'color':'black', 'style':'dashed', 'penwidth':'1'}

    # First:
    # dep contains the module-to-module connections.  Let's
    # generate the module nodes and connect them using the style
    # information in module_node and module_edge

    for j in dep: # dependencies show module to module links

        u.node(j[0], **module_node )
        u.node(j[1], **module_node )
        u.edge(j[0], j[1], **module_edge)

    # Second:
    # We need to hang the contents of each module onto its node
    # We only want to do this once so we need a list of unique nodes
    # dep is a nested list of tuples so let's find some code on Stack
    # Overflow to flatten a list:
    # https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
    # and then use Python's set functionto keep only the unique items

    dep_unique_modules=set([item for sublist in dep for item in sublist]) # List of unique modules

    # Some packages like Python's math don't have any submodules. Or,
    # we may start down in a hierarchy where there are no submodules
    # to the requested package.  In this case, dep would be an empty
    # set and we need to ensure at minimum to hange the builtins, classes,
    # etc. of the requested package on its node

    # If there are no unique modules, we will use the requested package
    # as the unique module

    if dep_unique_modules == set():
        dep_unique_modules = [package]

    # Now, let's look at each unique module and hang the builtins,
    # classes, functions, and nofta onto the pertinent node

    for k in dep_unique_modules:
        u.node(k + '-'+ 'builtins', **builtins_node)
        u.edge(k,k + '-'+ 'builtins', **builtins_edge)

    # Note that pycustoms returns the string 'Uninspectable' if it
    # can't open something for whatever reason.  In this case, we
    # get a string as a dict's value as opposed to another dict.
    # My solution is to ignore the Uninspectable items and only
    # process those keys that return a dict
    # I'm leaving this as spahetti code since it was very helpful
    # during debugging to have it all written out.  I have learned what
    # I want to learn from this exercise and there's no reason to
    # make this pretty or Pythonic

        if type(ans[k]) == dict:
            for l in ans[k]['builtins']:  
                u.node(k + '.' + l , **builtins_node)
                u.edge(k + '-'+ 'builtins', k + '.' + l , **builtins_edge)
        u.node(k + '-'+ 'classes', **classes_node)
        u.edge(k,k + '-'+ 'classes', **classes_edge)
        if type(ans[k]) == dict:
            for l in ans[k]['classes']:  
                u.node(k + '.' + l , **classes_node)
                u.edge(k + '-'+ 'classes', k + '.' + l, **classes_edge)
        u.node(k + '-'+ 'functions', **functions_node)
        u.edge(k,k + '-'+ 'functions', **functions_edge)
        if type(ans[k]) == dict:
            for l in ans[k]['functions']:  
                u.node(k + '.' + l , **functions_node)
                u.edge(k + '-'+ 'functions', k + '.' + l , **functions_edge)
        u.node(k + '-'+ 'nofta', **nofta_node)
        u.edge(k,k + '-'+ 'nofta', **nofta_edge)
        if type(ans[k]) == dict:
            for l in ans[k]['nofta']:  
                u.node(k + '.' + l , **nofta_node)
                u.edge(k + '-'+ 'nofta', k + '.' + l, **nofta_edge)

    # Save a text file with the graph data for graphviz, Gephi, or some other
    # code to process
    u.save()

    # Spawn a viewer if one exists for the output filetype (pdf, svg, ...)
    u.view()

    # Display the graph as an image in an Output cell.  Not very useful, especially
    # for large graphs.  No zoom, pan, or other features
    # u

In [3]:
# Let's pick a package...
package = 'matplotlib'

#... invoke pycustoms and get the dictionary of links
# and the connections of the modules, respectively
ans, dep = pyCustoms(package)

# Pass the dictionary with the package breakdown
# along with the the module dependencies to the drawing routine
draw_graph(ans, dep)

In [None]:
# Let's look at the defaultdict returned by pyCustoms
import pprint
pprint.pprint(sorted(ans.items()))



In [None]:
# And let's look at the module linkages
pprint.pprint(dep)