# Identifying the level of each housing characteristic

This notebook uses the UQ conda env. It visualizes the ResStock project as a hierarchical graph with the nodes being the housing characteristics the dependencies as directed edges.  The housing characteristics at the top of the graph are high level characteristics that have little dependencies, where housing characteristics at the bottom of the graph have many dependencies or depend on characteristics that have many dependencies.

## Python Version

In [None]:
import sys
print(sys.version)

## Import Modules

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout

import eulpuq.bayesian_update.children as ch

## Functions

In [None]:
def load_adj_graph(path_adj):
    """
    This function loads the adjacency matrix in as a data frame and then converts
    it to a graph.
    """
    adj_df = pd.read_csv(path_adj,index_col=0)

    # Create Graph
    G = nx.from_pandas_adjacency(adj_df,nx.DiGraph())

    # Reset index for convienience
    adj_df.reset_index(inplace=True,drop=True)
    
    return adj_df, G


def get_level_df(graph):
    hc_levels = {node: 0 for node in nx.topological_sort(graph)}

    # get level based on lowest ancestor
    for node in hc_levels:
        ancestors = nx.ancestors(graph, node)
        if ancestors:
            hc_levels[node] = max(hc_levels[anc] for anc in ancestors) + 1

    # save a copy
    hc_level_df = pd.DataFrame(
        hc_levels.items(), columns=["housing_characteristic", "level"]
    )
    
    return hc_level_df


def isolate_levels(G, hc_level_df):
    level_dct = hc_level_df.set_index("housing_characteristic")["level"]
    
    edges_new = []
    for edge in G.edges:
        if np.abs(level_dct[edge[0]]-level_dct[edge[1]]) == 1:
            edges_new.append(edge)
    
    return edges_new


## Calculate adjacency_matrix

In [None]:
override_existing_adj_df = True # <-----

# Get all project directories
project_dirs = list(
    Path(".").resolve().parent.parent.parent.glob("project_*")
)

for project_dir in project_dirs:
    outdir = project_dir / 'util' / 'dependency_wheel'
    outdir.mkdir(parents=True, exist_ok=True)
    
    path_HCs = str(project_dir / "housing_characteristics")
    level_dct, adj_df = ch.level_calc(path_HCs)
    
    if override_existing_adj_df:
        outfile = outdir / "adjacency_matrix.csv"
        adj_df.to_csv(outfile, index=True)
        
        print(f"Saved adj_df for {project_dir} to:\n  {outfile}")

## Load data and plot

In [None]:
# for colors: https://graphviz.org/doc/info/colors.html
color_cycle = ["gainsboro", "darkslategray1", "yellow", "lightpink", "yellowgreen", "tan1", "plum",
               "antiquewhite1", "lightskyblue2", "gold", "thistle1", "palegreen1", "darksalmon", "mediumpurple1"]

for project_dir in project_dirs:
    
    print(f'Creating dependency graphs for {project_dir.name}:')
    print('----------------------------------------')
    print(project_dir)

    # Create path for adjacency matrix
    path_adj = project_dir / 'util/dependency_wheel/adjacency_matrix.csv'

    # Load adjacency matrix and construct graph
    try:
        # Load adjacency matrix and graph
        adj_df, G = load_adj_graph(path_adj)
        hc_level_df= get_level_df(G)

        # Get information about the graph
        longest_path_len = nx.dag_longest_path_length(G) # max level != num of levels
        print(f'DiGraph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges')
        print('Max level: %d' % longest_path_len)
        
        # check levels
        nlevels = int(longest_path_len+1)
        assert nlevels == hc_level_df["level"].nunique()
        
        # Save the levels data frame
        ## Make directory if it doesn't exist
        outdir = project_dir / 'util'/ 'dependency_graphs'
        outdir.mkdir(parents=True, exist_ok=True)
        hc_level_df.to_csv(outdir / 'hc_levels.csv',index=False)
        
        # Create Plots
        # make color dict
        level_dct = hc_level_df.set_index("housing_characteristic")["level"]
        colors = np.tile(color_cycle, nlevels//len(color_cycle)+1)[:nlevels]
        level_color_dct = dict(zip(range(nlevels), colors))
        
        ### [1] dependency graph full
        print("1. Full dependency graph...")
        out_file = outdir / 'Dependency_Graph_Full.pdf'
        A = nx.nx_agraph.to_agraph(G)
        A.node_attr['style'] = 'filled'
        A.node_attr['shape'] = 'box'
        A.node_attr['fontsize'] = 8
        A.node_attr['height'] = 0.5
        A.node_attr['margin'] = 0.1
        A.edge_attr['color'] = 'darkgray'
        
        # color labels by level
        for node in A.nodes():
            n = A.get_node(node)
            level = level_dct[node]
            n.attr['fillcolor'] = level_color_dct[level]
            
        A.draw(out_file, prog="dot")
        
        ### [2] dependency graph levels
        print("2. Level dependency graph...")
        
        # create new graph
        edges_new = isolate_levels(G, hc_level_df)
        G_new = nx.DiGraph()
        G_new.add_nodes_from(list(G.nodes))
        G_new.add_edges_from(edges_new)
        
        out_file = outdir / 'Dependency_Graph_Levels.pdf'
        A = nx.nx_agraph.to_agraph(G_new)
        A.node_attr['style'] = 'filled'
        A.node_attr['shape'] = 'box'
        A.node_attr['fontsize'] = 8
        A.node_attr['height'] = 0.5
        A.node_attr['margin'] = 0.1
        A.edge_attr['color'] = 'darkgray'
        
        # color labels by level
        for node in A.nodes():
            n = A.get_node(node)
            level = level_dct[node]
            n.attr['fillcolor'] = level_color_dct[level]
            
        A.draw(out_file, prog="dot")
        
        print(f">> Dependency graphs created {project_dir.name}, check graphs at: {outdir}\n")
    except IOError:
        pass
        