# Identifying the sampling level of each housing characteristic

In this notebook, I am trying to put together a hierarchical graph visualization that shows the dependencies and when they will be sampled.

## Import Modules

In [1]:
import numpy as np
import pandas as pd
from IPython.display import display
import networkx as nx
from networkx.drawing.nx_pydot import graphviz_layout
from nxpd import draw

import matplotlib.pyplot as plt
%matplotlib inline

## Load Data

In [2]:
# Load
adj_df = pd.read_csv('../dependencyWheels/adjacency matrix.csv',index_col=0)

# Create Graph
G = nx.from_pandas_adjacency(adj_df,nx.DiGraph())
n_nodes = len(G.nodes)
n_edges = len(G.edges)
node_names = adj_df.columns.values
longest_path_len = nx.dag_longest_path_length(G)
edges_org = G.edges
print(nx.info(G))
print('Longest path length:\t %d' % longest_path_len)

# Reset index for convienience
adj_df.reset_index(inplace=True,drop=True)

Name: 
Type: DiGraph
Number of nodes: 83
Number of edges: 145
Average in degree:   1.7470
Average out degree:   1.7470
Longest path length:	 8


## Identify level of housing characteristics

In [3]:
# Initialize node level values
level = np.zeros_like(adj_df[adj_df.columns[0]])

# Iterate over nodes up to the longest path length
for j in range(int(longest_path_len)):
    # For each housing characteristic node
    for i in range(n_nodes):
        # Get the index of the dependencies
        column_name = adj_df.columns[i]
        dependencies = np.where(adj_df[column_name] == 1)[0]
        
        #If there are dependencies
        if len(dependencies) > 0:
            # Get the level of the dependencies
            dependency_levels = level[dependencies]
            
            # Identify the maximum level of the dependencies
            max_level = np.max(dependency_levels)
            
            # This node's level must be one greater than the max level
            level[i] = max_level + 1

# Print housing characteristics grouped by levels
for i in range(int(longest_path_len)):
    idx = np.where(level == i)[0]
    print("Level: %d" % i)
    for j in range(len(idx)):
        print("  %s" % node_names[idx[j]])
    print()

Level: 0
  Bathroom Spot Vent Hour
  Ceiling Fan
  Days Shifted
  Dehumidifier
  Door Area
  Doors
  Eaves
  Geometry Building Type FPL
  Insulation Finished Roof
  Lighting
  Mechanical Ventilation
  Misc Extra Refrigerator
  Misc Freezer
  Misc Gas Fireplace
  Misc Gas Grill
  Misc Gas Lighting
  Misc Hot Tub Spa
  Misc Pool
  Misc Well Pump
  Natural Ventilation
  Occupants
  Overhangs
  PV
  Range Spot Vent Hour
  Roof Material
  Solar Hot Water
  Usage Level
  Window Areas

Level: 1
  Clothes Washer
  Dishwasher
  Geometry Building Type
  Hot Water Fixtures
  Location EPW
  Plug Loads
  Refrigerator

Level: 2
  Cooling Setpoint
  Geometry Foundation Type
  Geometry Is Multifamily Low Rise
  Geometry Perimeter Footprint Ratio
  Heating Setpoint
  Location Region
  Vintage FPL

Level: 3
  Geometry House Size
  Geometry Nearest Neighbor LeftRight
  Geometry Shared Walls MF
  Geometry Shared Walls SFA
  Heating Fuel
  Orientation
  Vintage

Level: 4
  Clothes Dryer
  Cooking Range
  D

## Identify edges that jump more than 1 level

In [4]:
jump_edges = list()
edges_new = list()
for edge in G.edges:
    idx_in = np.where(edge[0] == node_names)[0][0]
    idx_out = np.where(edge[1] == node_names)[0][0]

    jump = int(np.fabs(level[idx_in] - level[idx_out]))
    if jump > 1:
        jump_edges.append(edge)
        print(edge)
        print('  Total Jump: %d' % jump)
        print('  Start,End: %d,%d' % (level[idx_in],level[idx_out]) )
        print('  Paths:')
        for path in nx.all_simple_paths(G,node_names[idx_in],node_names[idx_out]):
            for node in path:
                print("   ",node)
            print()
        print()
    else:
        edges_new.append(edge)

print(len(jump_edges))

('Geometry Building Type FPL', 'Geometry Building  Number Units HL')
  Total Jump: 4
  Start,End: 0,4
  Paths:
    Geometry Building Type FPL
    Geometry Building  Number Units HL

    Geometry Building Type FPL
    Location EPW
    Vintage FPL
    Vintage
    Geometry Building  Number Units HL

    Geometry Building Type FPL
    Location EPW
    Vintage
    Geometry Building  Number Units HL

    Geometry Building Type FPL
    Vintage FPL
    Vintage
    Geometry Building  Number Units HL


('Geometry Building Type FPL', 'Geometry Building Number Units MF')
  Total Jump: 5
  Start,End: 0,5
  Paths:
    Geometry Building Type FPL
    Geometry Building Number Units MF

    Geometry Building Type FPL
    Geometry Building Type
    Geometry Building Floors
    Geometry Building Number Units MF

    Geometry Building Type FPL
    Geometry House Size
    Geometry Building Floors
    Geometry Building Number Units MF

    Geometry Building Type FPL
    Location EPW
    Geometry House Size
 

    HVAC System Cooling Type
    HVAC System Shared

    Heating Fuel
    HVAC System Is Shared
    HVAC System Shared

    Heating Fuel
    HVAC System Shared


('HVAC System Is Shared', 'HVAC System Cooling Type')
  Total Jump: 3
  Start,End: 4,7
  Paths:
    HVAC System Is Shared
    HVAC System Cooling Type

    HVAC System Is Shared
    HVAC System Cooling
    HVAC System Cooling Type

    HVAC System Is Shared
    HVAC System Is Heatpump
    HVAC System Cooling
    HVAC System Cooling Type


('HVAC System Is Shared', 'HVAC System Cooling')
  Total Jump: 2
  Start,End: 4,6
  Paths:
    HVAC System Is Shared
    HVAC System Cooling

    HVAC System Is Shared
    HVAC System Is Heatpump
    HVAC System Cooling


('HVAC System Is Shared', 'HVAC System Heating Electricity')
  Total Jump: 2
  Start,End: 4,6
  Paths:
    HVAC System Is Shared
    HVAC System Heating Electricity

    HVAC System Is Shared
    HVAC System Is Heatpump
    HVAC System Heating Electricity


('HVAC System Is 

## Plot the Graph

In [9]:
G_new = nx.DiGraph()
G_new.add_nodes_from(node_names)
G_new.add_edges_from(edges_new)

scale_x = 1.0
scale_y = 1.0
pos = {}
for i in range(int(longest_path_len)+1):
    idx = np.where(level == i)[0][::-1]
    n_level = len(idx)
    
    j = 1
    for idx_i in idx:
        y = n_level*scale_y*(float(j)/float(n_level)) - n_level/2.0
        x = i*scale_x
        pos[node_names[idx_i]] = (x,y)
        j += 1

draw(G,filename='Dependency_Graph_Full.pdf',show=False) # Full Graph
draw(G_new,filename='Dependency_Graph_Levels.pdf',show=False) # Levels Graph

'Dependency_Graph_Levels.pdf'