# Identifying the level of each housing characteristic

This notebook visualizes the ResStock project as a hierarchical graph with the nodes being the housing characteristics the dependencies as directed edges.  The housing characteristics at the top of the graph are high level characteristics that have little dependencies, where housing characteristics at the bottom of the graph have many dependencies or depend on characteristics that have many dependencies.

## Python Version

In [None]:
import sys
print(sys.version)

## Import Modules

In [None]:
import os
import numpy as np
import pandas as pd
from glob import glob
import networkx as nx
from nxpd import draw
from os.path import isfile, join
from IPython.display import display
from networkx.drawing.nx_pydot import graphviz_layout

## Functions

In [None]:
def load_adj_graph(path_adj):
    """
    This function loads the adjacency matrix in as a data frame and then converts
    it to a graph.
    """
    adj_df = pd.read_csv(path_adj,index_col=0)

    # Create Graph
    G = nx.from_pandas_adjacency(adj_df,nx.DiGraph())

    # Reset index for convienience
    adj_df.reset_index(inplace=True,drop=True)
    
    return adj_df,G

def get_level_df(adj_df,longest_path_len,n_nodes):
    """
    This function returns the level of each housing characteristic(0 is no dependencies)
    """
    # Initialize node level values
    level = np.zeros_like(adj_df[adj_df.columns[0]])

    # Iterate over nodes up to the longest path length
    for j in range(int(longest_path_len)):
        # For each housing characteristic node
        for i in range(n_nodes):
            # Get the index of the dependencies
            column_name = adj_df.columns[i]
            dependencies = np.where(adj_df[column_name] == 1)[0]

            #If there are dependencies
            if len(dependencies) > 0:
                # Get the level of the dependencies
                dependency_levels = level[dependencies]

                # Identify the maximum level of the dependencies
                max_level = np.max(dependency_levels)

                # This node's level must be one greater than the max level
                level[i] = max_level + 1

    # Print housing characteristics grouped by levels
    hc_level_names = list()
    levels = list()
    for i in range(int(longest_path_len)):
        idx = np.where(level == i)[0]
        for j in range(len(idx)):
            hc_level_names.append(node_names[idx[j]])
            levels.append(i)
    
    # Create Data Frame
    hc_level_df = pd.DataFrame(columns=['housing_characteristic','level'])
    hc_level_df['housing_characteristic'] = hc_level_names
    hc_level_df['level'] = levels
    
    return hc_level_df,level

def isolate_levels(G,level,node_names):
    """
    This function gets edges that do not jump different levels.
    """
    jump_edges = list()
    edges_new = list()
    for edge in G.edges:
        idx_in = np.where(edge[0] == node_names)[0][0]
        idx_out = np.where(edge[1] == node_names)[0][0]

        jump = int(np.fabs(level[idx_in] - level[idx_out]))
        if jump > 1:
            jump_edges.append(edge)
        else:
            edges_new.append(edge)

    return edges_new


## Load Data

In [None]:
# Get all project directories
project_dirs = glob(join('..','..','..','project_*'))

print('Creating a dependency graphs for project:')
print('----------------------------------------')
for project_dir in project_dirs:

    # Create path for adjacency matrix
    path_adj = join(project_dir,'util/dependency_wheel/adjacency_matrix.csv')

    # Load adjacency matric and construct graph
    try:
        # Load adjacency matrix and graph
        adj_df,G = load_adj_graph(path_adj)

        # Get information about the graph
        print(project_dir)
        n_nodes = len(G.nodes)
        n_edges = len(G.edges)
        node_names = adj_df.columns.values
        longest_path_len = nx.dag_longest_path_length(G)
        edges_org = G.edges
        print(nx.info(G))
        print('Longest path length:\t %d' % longest_path_len)
        
        # Get levels
        hc_level_df,level = get_level_df(adj_df,longest_path_len,n_nodes)
        print('Maximum levels: \t %d' % hc_level_df['level'].max() )
        print
        
        # Save the levels data frame
        ## Make directory if it doesn't exist
        outdir = join(project_dir,'util','dependency_graphs')
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        ## Save data frame
        hc_level_df.to_csv(join(outdir,'hc_levels.csv'),index=False)
        
        edges_new = isolate_levels(G,level,node_names)
        
        # Create Plots
        G_new = nx.DiGraph()
        G_new.add_nodes_from(node_names)
        G_new.add_edges_from(edges_new)

        scale_x = 1.0
        scale_y = 1.0
        pos = {}
        for i in range(int(longest_path_len)+1):
            idx = np.where(level == i)[0][::-1]
            n_level = len(idx)

            j = 1
            for idx_i in idx:
                y = n_level*scale_y*(float(j)/float(n_level)) - n_level/2.0
                x = i*scale_x
                pos[node_names[idx_i]] = (x,y)
                j += 1

        out_file = join(outdir,'Dependency_Graph_Full.pdf')
        draw(G,filename=out_file,show=False) # Full Graph
        out_file = join(outdir,'Dependency_Graph_Levels.pdf')
        draw(G_new,filename=out_file,show=False) # Levels Graph
    except IOError:
        pass
        