### Update glossary.csv file using data from schemas.yml

In [194]:
import cea.scripts
import cea.inputlocator
import cea.config
import cea.glossary
from cea.tests.trace_inputlocator import get_csv_schema
import os
import yaml
import json

In [195]:
schemas = cea.scripts.schemas()
glossary_df = cea.glossary.read_glossary_df()
locators = schemas.keys()

### start by finding all entries in schemas.yml without a schema

the following three locator methods need "special" treatment:
- get_optimization_checkpoint
  - "special" schema
- get_optimization_disconnected_cooling_capacity
  - only present in projects with cooling network
- get_optimization_connected_cooling_capacity
  - only present in projects with cooling network
  
this code assumes you have a "reference-case-cooling/baseline" in your projectroot and have run the optimization on that (e.g. run `cea workflow --workflow district-cooling-system`)

In [196]:
config = cea.config.Configuration()
config.scenario = os.path.join(config.project, "..", "reference-case-cooling", "baseline")
locator = cea.inputlocator.InputLocator(scenario=config.scenario)

# load get_optimization_checkpoint schema
if not schemas["get_optimization_checkpoint"]["schema"]:
    with open(locator.get_optimization_checkpoint(1), 'r') as fp:
        get_optimization_checkpoint = json.load(fp)
    schemas["get_optimization_checkpoint"]["schema"] = {
        str(key): {"sample_data": get_optimization_checkpoint[key],
                   "types_found": None}
        for key in get_optimization_checkpoint.keys()
    }

# load get_optimization_disconnected_cooling_capacity schema
if not schemas["get_optimization_disconnected_cooling_capacity"]["schema"]:
    schemas["get_optimization_disconnected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))
    
# load get_optimization_connected_cooling_capacity schema
if not schemas["get_optimization_connected_cooling_capacity"]["schema"]:
    schemas["get_optimization_connected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))

In [197]:
# each locator method needs a "schema" entry (this should not output anything)
for lm in locators:
    if not "schema" in schemas[lm]:
        print lm

In [198]:
# the "schema" entry should not be `None` (this should not output anything)
for lm in locators:
    if not schemas[lm]["schema"]:
        print lm

if any of the above produce printed output, update schemas.yml and re-run the notebook

### make sure the "used-by" and "created-by" lists don't contain duplicates

In [199]:
for lm in locators:
    if not "used_by" in schemas[lm]:
        print lm

In [200]:
for lm in locators:
    if not "created_by" in schemas[lm]:
        print lm

each locator should have a "used_by" and a "created_by" - let's assume they're all lists

In [201]:
for lm in locators:
    schemas[lm]["used_by"] = sorted(set(schemas[lm]["used_by"]))
    schemas[lm]["created_by"] = sorted(set(schemas[lm]["created_by"]))

In [202]:
# save it back
schemas_yml = os.path.join(os.path.dirname(cea.scripts.__file__), 'schemas.yml')
print "saving to:", schemas_yml
with open(schemas_yml, 'w') as fp:
    yaml.dump(schemas, fp)
schemas = cea.scripts.schemas()

saving to: c:\users\darthoma\documents\github\cityenergyanalyst\cea\schemas.yml


### find all schema entries that are not in glossary.csv

In [203]:
# first: what are the missing locator methods?
glossary_lms = set(glossary_df.LOCATOR_METHOD.values)
schemas_lms = set(schemas.keys())
missing_lms = sorted(schemas_lms - glossary_lms)
print '\n'.join(missing_lms)




for each of those missing locator methods in glossary.csv, we need to append entries for each of the fields of that file. some of those files are special (the optimization checkpoints comes to mind). each glossary.csv entry has the following fields:

- SCRIPT (use first "created_by" or "-", if input file)
- LOCATOR_METHOD
- FILE_NAME (get from schemas.yml file_path)
- VARIABLE (this is the field name)
- DESCRIPTION (use "TODO")
- UNIT (use "TODO")
- VALUES (use "TODO")
- TYPE (use the first from schemas.types_found)
- COLOR (use "black") - I'm not really sure we need this at all in glossary.csv?

In [204]:
for lm in missing_lms:
    script = schemas[lm]["created_by"][0] if len(schemas[lm]["created_by"]) else "-"
    file_name = schemas[lm]["file_path"]
    for variable in schemas[lm]["schema"].keys():
        if "types_found" in schemas[lm]["schema"][variable]:
            type = schemas[lm]["schema"][variable]["types_found"][0] if schemas[lm]["schema"][variable]["types_found"] else "TODO"
        else:
            type = "TODO"
        glossary_df = glossary_df.append({"key": "{lm}!!!{variable}".format(**locals()),
                                          "SCRIPT": script,
                                          "LOCATOR_METHOD": lm,
                                          "FILE_NAME": file_name,
                                          "VARIABLE": variable,
                                          "DESCRIPTION": "TODO",
                                          "UNIT": "TODO",
                                          "VALUES": "TODO",
                                          "TYPE": type,
                                          "COLOR": "black"}, ignore_index=True)
glossary_df.to_csv(os.path.join(os.path.dirname(cea.glossary.__file__), 'glossary.csv'),
                  columns=["SCRIPT", "LOCATOR_METHOD", "FILE_NAME", "VARIABLE", "DESCRIPTION", "UNIT", "VALUES", "TYPE", "COLOR"],
                  index=False)
print("saved new glossary.csv - reloading")
glossary_df = cea.glossary.read_glossary_df()

saved new glossary.csv - reloading


### find all glossary entries that are not in schemas.yml

In [209]:
# find all locator methods not present in schemas.yml
old_entries = []  # stuff left over from previous versions of cea
for index, row in glossary_df.iterrows():
    lm = row["LOCATOR_METHOD"]
    if lm not in schemas:
        old_entries.append(lm)

for row in old_entries:
    lm = row["LOCATOR_METHOD"]
    glossary_df = glossary_df[glossary_df["LOCATOR_METHOD"] != row["LOCATOR_METHOD"]]

get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_archetypes_system_controls databases/ch/archetypes/system_controls.xlsx:heating_cooling
get_building_restrictions inputs/building-properties/restrictions.dbf
get_building_restrictions inputs/building-properties/restrictions.dbf
get_building_restrictions inputs/building-properties/restrictions.dbf
get_building_restrictions inputs/building-properties/restrictions.dbf
get_building_restrictions inputs/building-properties/restrictions.dbf
get_building_restrictions inputs/building-properties/restrictions.dbf
get_data_benchmark databases

get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_network_node_types_csv_file outputs/data/optimization/network/layout/dh__nodes.csv
get_node_mass_flow_csv_file outputs/data/optimization/network/layout/nominal_nodemassflow_at_design_dh__kgpers.csv
get_node_mass_flow_csv_file outputs/data/optimization/network/layout/nominal_nodemassflow_at_design_dh__kgpers.csv
get_optimization_network_edge_list_file outputs/data/optimization/network/layout/dh__edges.csv
get_optimization_network_edge_list_file outputs/data/optimization/network/layout/dh__edges.csv
get_optimization_network_edge_list_file outputs/data/optim

get_supply_systems databases/ch/systems/supply_systems.xls:Furnace
get_supply_systems databases/ch/systems/supply_systems.xls:Furnace
get_supply_systems databases/ch/systems/supply_systems.xls:Furnace
get_supply_systems databases/ch/systems/supply_systems.xls:Furnace
get_supply_systems databases/ch/systems/supply_systems.xls:Furnace
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/systems/supply_systems.xls:HEX
get_supply_systems databases/ch/sys

get_thermal_networks databases/ch/systems/thermal_networks.xls:MATERIAL PROPERTIES
get_thermal_networks databases/ch/systems/thermal_networks.xls:MATERIAL PROPERTIES
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG
get_thermal_networks databases/ch/systems/thermal_networks.xls:PIPING CATALOG


### make sure glossary.csv (locator_method, variable) is unique

### clean the sample_data (make longs into ints) 