# Create Dependency Wheel Data

This notebook creates the data files in each `<project_folder>/util/dependency_wheel` folder necessary for the dependency wheels in `dep_wheel.html`.  The script reads the header of each .tsv file in the housing characteristics folder to identify the dependencies.  The dependencies are then saved in a data frame in the form of an adjacency matrix.  The adjacency matrix identifies the dependencies of a housing characteristic along the rows of the matrix and the dependents along the columns of the matrix. The data files for the dependency wheels are then created from the adjacency matrix.  This notebook also saves the adjacency matrix. 

## Python Version

In [None]:
import sys
print(sys.version)

## Import Modules

In [None]:
import os
import json
import numpy as np
import pandas as pd
from glob import glob
from os import listdir
from shutil import copyfile
from os.path import isfile, join
from IPython.display import display

## Functions

In [None]:
def get_hc_files_names(path_HCs):
    """
    This function returns the housing characteristics names and files (.tsv) in a list format.
    The input path_HCs is the path to the housing characteristics directory in a ResStock project.
    """
    # Load the file names into memory
    HC_files = [f for f in listdir(path_HCs) if isfile(join(path_HCs, f))]

    # Ignore any hidden files (beginning with ".")
    idx = []
    for i in range(len(HC_files)):
        if HC_files[i][0] != ".":
            idx.append(i)
    HC_files = list(np.array(HC_files)[idx])

    # Remove the .tsv from the housing characteristic name
    HC_names = HC_files[:]
    i = 0
    for HC_str in HC_names:
        HC_names[i] = HC_str.split('.')[0]
        i += 1
        
    # Sort alphabetical
    idx = np.argsort(HC_names)
    HC_files = list(np.array(HC_files)[idx])
    HC_names = list(np.array(HC_names)[idx])

    return HC_files,HC_names

def create_adjacency_matrix(HC_files,HC_names):
    """
    This function creates a depencency and dependents adjacency matrix.  
    The inputs are a list of housing characteristic files and names from the get_hc_files_names function.
    This function outputs a data frame version of the adjacency matrix
    """
    
    # Initialize the adjacency matrix
    adj_mat = np.zeros((len(HC_names),len(HC_names)))

    # For each housing characteristic
    for i in range(len(HC_files)):

        ## Read the first line of the housing characteristic file
        with open(path_HCs + '/' + HC_files[i]) as f:
            header_str = f.readline()

        ## For each column in the tsv file
        for column_name in header_str.split('\t'):

            ### If there is a dependency
            if len(column_name) > 0:
                if column_name[0] == 'D':
                    if column_name.find('Dependency='):
                        print(HC_files[i],column_name)

                    #### Get the dependency name
                    dependency_str = column_name.split('=')[1]

                    #### Find in the housing characteristics names
                    j = HC_names.index(dependency_str)

                    #### Include the depenancy in the adjacency matrix
                    adj_mat[i,j] = 1

    # Convert to Pandas
    adj_df = pd.DataFrame(adj_mat,index=HC_names,columns=HC_names).T

    return adj_df

def convert_adjacency_matrix_to_json(project_dir,HC_names,adj_df,outdir):
    # Initialization
    composer_lock = {'packages': [{"key": 1}]}
    composer_json = dict()
    
    # Loop over each housing characteristic
    cnt = 0
    for name in HC_names:
        # Construct a data dict
        data = {}
        data['name'] = name

        # Look across the row
        idx = np.where(adj_df.loc[name] == 1)[0]

        # If there is a dependency
        if len(idx) > 0:
            #Create the require key
            data['require'] = dict()

            # Fill the dependencies
            for i in idx:
                data['require'][adj_df.columns[i]] = 1

        # If this is the first housing characteristic
        if cnt == 0:
            out_file = join(outdir,'composer_backward.json')
            # Write the main file
            with open(out_file, 'w') as outfile:
                json.dump(data, outfile)
        else:
            # Add housing characteristic as json format
            composer_lock['packages'].append(data)
        cnt += 1

    # Remove the initialization
    composer_lock['packages'] = composer_lock['packages'][1:]

    # Write the lock filed
    out_file = join(outdir,'composer_backward.lock')
    with open(out_file, 'w') as outfile:
        json.dump(composer_lock, outfile, sort_keys=True)

## Load Housing Characteristic Names into memory

In [None]:
# Get all project directories
project_dirs = glob(join('..','..','..','project_*'))

print('Creating a dependency wheel for project:')
print('----------------------------------------')
for project_dir in project_dirs:
    # Create a path to the housing characteristics
    path_HCs = join(project_dir,'housing_characteristics')
    
    # Get the housing characteristics
    HC_files,HC_names = get_hc_files_names(path_HCs)
    
    # If there are housing characteristics available
    if len(HC_files) > 0:
        print(project_dir)
        # Create the adjacency matrix
        adj_df = create_adjacency_matrix(HC_files,HC_names)

        # Save Adjacency Matrix
        ## Make directory if it doesn't exist
        outdir = join(project_dir,'util','dependency_wheel')
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        ## Save adjacency matrix
        adj_df.to_csv(join(outdir,'adjacency_matrix.csv'))
        
        # Create JSON output files
        convert_adjacency_matrix_to_json(project_dir,HC_names,adj_df,outdir)
        
        # Copy html_file to the util directory in the project folder
        src = 'dep_wheel_blank_template.html'
        dst = join(outdir,'dep_wheel.html')
        copyfile(src, dst)