In [None]:
import os
import re
import glob as gb
import pandas as pd

# from eppy import modeleditor
from eppy.modeleditor import IDF
import eppy.EPlusInterfaceFunctions.parse_idd as parse_idd

iddfile = "/usr/local/EnergyPlus-9-0-0/Energy+.idd"
IDF.setiddname(iddfile)

x = parse_idd.extractidddata(iddfile)

useful = x[2]

In [None]:
# Some classes are in IDD while not implemented
exclude_classes = ['FaultModel:PressureSensorOffset:OutdoorAir']

class_names = [k[0]['idfobj'] for k in useful]
class_names = [x for x in class_names if not x in exclude_classes]

# Prepare a dict to hold all mappings
# I don't create keys on the fly because that's going to help me determine
# the classes I couldn't map
subsection_names = {k: None for k in class_names}

In [None]:
# We sort it by two rules:
# Alphabetically (secondary)
# Length (primary): from longest to shortest
# We want to avoid starting to replace the "Output:Meter" portion of "Output:Meter:FileOnly"
class_names.sort()
class_names.sort(key=len, reverse=True)

In [None]:
# files = gb.glob('../**/*.tex', recursive=True)

In [None]:
io_ref_files = gb.glob('../input-output-reference/**/*.tex', recursive=True)

# Read entire I/O

In [None]:
all_lines = {}
for p in io_ref_files:
    with open(p, 'r') as f:
        lines = f.read().splitlines()
        all_lines[p] = lines

## Find the subsection labels for all objects

In [None]:
for class_name in class_names:
    for p, lines in all_lines.items():
        # Only do the file is the class_name hasn't been found yet
        if subsection_names[class_name] is None:
            for line in lines:
                # Some stuff in in a section rather than subsection
                re_subsection = re.compile(r'\\(?:sub)?section{{{c}}}\\label{{(.*)}}'.format(c=class_name))
                if class_name in line:
                    m = re_subsection.search(line)
                    if m:
                        subsection_names[class_name] = m.groups()[0]
                        # No need to keep processing the file, we have what we need
                        break
                        #print("Found subsection")
                    else:
                        pass
                        #print(line)

In [None]:
# Manual fixes
subsection_names['Shading:Site'] = 'shadingsite-shadingbuilding'
subsection_names['Shading:Building'] = 'shadingsite-shadingbuilding'
subsection_names['Shading:Site:Detailed'] = 'shadingsitedetailed-shadingbuildingdetailed'
subsection_names['Shading:Building:Detailed'] = 'shadingsitedetailed-shadingbuildingdetailed'

subsection_names['Output:Meter'] = 'outputmeter-and-outputmetermeterfileonly'
subsection_names['Output:Meter:MeterFileOnly'] = 'outputmeter-and-outputmetermeterfileonly'
subsection_names['Output:Meter:Cumulative'] = 'outputmetercumulative-and-outputmetercumulativemeterfileonly'
subsection_names['Output:Meter:Cumulative:MeterFileOnly'] = 'outputmetercumulative-and-outputmetercumulativemeterfileonly'

subsection_names['SizingPeriod:WeatherFileDays'] = 'sizingperiodweatherfiledays'
subsection_names['SizingPeriod:WeatherFileConditionType'] = 'sizingperiodweatherfileconditiontype'


subsection_names['SurfaceContaminantSourceAndSink:Generic:PressureDriven'] = 'surfacecontaminantsourceandsinkgenericpressuredriven'
subsection_names['SurfaceContaminantSourceAndSink:Generic:BoundaryLayerDiffusion'] = 'surfacecontaminantsourceandsinkgenericboundarylayerdiffusion'
subsection_names['SurfaceContaminantSourceAndSink:Generic:DepositionVelocitySink'] = 'surfacecontaminantsourceandsinkgenericdepositionvelocitysink'
subsection_names['ZoneContaminantSourceAndSink:Generic:DepositionRateSink'] = 'zonecontaminantsourceandsinkgenericdepositionratesink'

subsection_names['WindowsCalculationEngine'] = 'windowscalculationengine'

# All of the group objects are detailed in the Auxiliary Program file, but there is a section in I/O
# that lists them and says that, so we link to it
group_objects = ['GroundHeatTransfer:Slab:Materials',
 'GroundHeatTransfer:Slab:MatlProps',
 'GroundHeatTransfer:Slab:BoundConds',
 'GroundHeatTransfer:Slab:BldgProps',
 'GroundHeatTransfer:Slab:Insulation',
 'GroundHeatTransfer:Slab:EquivalentSlab',
 'GroundHeatTransfer:Slab:AutoGrid',
 'GroundHeatTransfer:Slab:ManualGrid',
 'GroundHeatTransfer:Slab:XFACE',
 'GroundHeatTransfer:Slab:YFACE',
 'GroundHeatTransfer:Slab:ZFACE',
 'GroundHeatTransfer:Basement:SimParameters',
 'GroundHeatTransfer:Basement:MatlProps',
 'GroundHeatTransfer:Basement:Insulation',
 'GroundHeatTransfer:Basement:SurfaceProps',
 'GroundHeatTransfer:Basement:BldgData',
 'GroundHeatTransfer:Basement:Interior',
 'GroundHeatTransfer:Basement:ComBldg',
 'GroundHeatTransfer:Basement:EquivSlab',
 'GroundHeatTransfer:Basement:EquivAutoGrid',
 'GroundHeatTransfer:Basement:AutoGrid',
 'GroundHeatTransfer:Basement:ManualGrid',
 'GroundHeatTransfer:Basement:XFACE',
 'GroundHeatTransfer:Basement:YFACE',
 'GroundHeatTransfer:Basement:ZFACE']

for group_obj in group_objects:
    subsection_names[group_obj] = 'group-detailed-ground-heat-transfer'
    
# Added in #7023 which isn't in develop yet
subsection_names['Coil:WaterHeating:AirToWaterHeatPump:VariableSpeed'] = 'coil-waterheating-airtowaterheatpump-variablespeed'

# Also changed in #7023
subsection_names['Curve:ChillerPartLoadWithLift'] = 'curvechillerpartloadwithlift'

In [None]:
s = pd.Series(subsection_names)
print("Count of found:")
print(s.notnull().value_counts())

print("\nClasses we couldn't map:")
print(s[s.isnull()].index.tolist())

# Some Class Names can be greedy

Some are way too greedy: used throughout, and are also english words, eg: 'Zone' which is used 6000 times

In [None]:
# all that don't have a ":" in them (we can assume a ':' makes it quite EnergyPlus-unique)
potentially_greedy = sorted([x for x in class_names if ':' not in x])

In [None]:
s_greedy = pd.Series([0]*len(potentially_greedy), index=potentially_greedy)
for class_name in s_greedy.index:
    for p, lines in all_lines.items():
        joined_lines = "\n".join(lines)
        count = sum(1 for _ in re.finditer(r'\b%s\b' % re.escape(class_name), joined_lines))
        s_greedy.loc[class_name] += count


In [None]:
s_greedy.sort_values(ascending=False)

In [None]:
# These are waaaay too greedy, used throughout, and are also english words
# This is the full list of potentially_greedy, where I comment out the ones I want to keep
exclude_greedy_class_names = [
 #'AirLoopHVAC',
 #'AvailabilityManagerAssignmentList',
 'Branch',
 #'BranchList',
 'Building',
 #'CentralHeatPumpSystem',
 #'ComfortViewFactorAngles',
 #'CondenserEquipmentList',
 #'CondenserEquipmentOperationSchemes',
 #'CondenserLoop',
 #'ConnectorList',
 'Construction',
 #'ConvergenceLimits',
 #'CurrencyType',
 #'DemandManagerAssignmentList',
 #'DistrictCooling',
 #'DistrictHeating',
 'Door',
 'Duct',
 #'ElectricEquipment',
 #'EnvironmentalImpactFactors',
 #'ExternalInterface',
 #'FuelFactors',
 #'GasEquipment',
 #'GeometryTransform',
 #'GlazedDoor',
 #'GlobalGeometryRules',
 #'HVACSystemRootFindingAlgorithm',
 #'HeatBalanceAlgorithm',
 #'HotWaterEquipment',
 #'InternalMass',
 'Lights',
 'Material',
 #'NodeList',
 #'OtherEquipment',
 'People',
 #'PlantEquipmentList',
 #'PlantEquipmentOperationSchemes',
 #'PlantLoop',
 'Roof',
 #'RoofIrrigation',
 #'RoomAirModelType',
 #'RunPeriod',
 #'ScheduleTypeLimits',
 #'ShadowCalculation',
 #'SimulationControl',
 #'SteamEquipment',
 #'TemperingValve',
 'Timestep',
 'Version',
 'Window',
 #'WindowShadingControl',
 #'WindowsCalculationEngine',
 'Zone',
 #'ZoneAirContaminantBalance',
 #'ZoneAirHeatBalanceAlgorithm',
 #'ZoneAirMassFlowConservation',
 #'ZoneCrossMixing',
 #'ZoneEarthtube',
 #'ZoneGroup',
 #'ZoneList',
 #'ZoneMixing',
 #'ZoneRefrigerationDoorMixing',
 #'ZoneTerminalUnitList',
 #'ZoneThermalChimney'
]

## Troubleshooting nested problems

Here's the initial problem:

    line = 'HVACTemplate:Plant:Chiller:ObjectReference'

    class_name = HVACTemplate:Plant:Chiller:ObjectReference

    => \hyperref[hvactemplateplantchillerobjectreference]{HVACTemplate:Plant:Chiller:ObjectReference}

    class_name = HVACTemplate:Plant:Chiller

    => \hyperref[hvactemplateplantchillerobjectreference]{\hyperref[hvactemplateplantchiller]{HVACTemplate:Plant:Chiller}:ObjectReference}

In [None]:
line = """HVACTemplate:Plant:Chiller:ObjectReference should be touched *once*,
a \hyperref[hvactemplateplantchillerobjectreference]{HVACTemplate:Plant:Chiller:ObjectReference} must not be touched"""
print("Before:")
print(line)
for class_name in ['HVACTemplate:Plant:Chiller:ObjectReference', 'HVACTemplate:Plant:Chiller']:
    # We don't want to add an hyperref if there's already one obviously
    # So we use a negative look behind to make sure we don't already have the right link done
    # And we use a negative look ahead so not match objects that have the same common denominator
    # (eg: don't replace 'Output:Meter' inside 'Output:Meter:FileOnly')
    noref_re = re.compile(r'(?<!\\hyperref\[{l}\]{{){c}(?!\:)'.format(c=re.escape(class_name),
                                                           l=re.escape(subsection_names[class_name])))
    
    noref_repl = r'\\hyperref[{l}]{{{c}}}'.format(c=class_name, l=subsection_names[class_name])
    
    line = noref_re.sub(noref_repl, line)
    
print("\nAfter:")
print(line)

# Do!

In [None]:
from tqdm import tqdm_notebook as tqdm

In [None]:
found_lines = []
# for class_name in ['Generator:Photovoltaic', 'Fan:VariableVolume', 'Coil:Cooling:Water']:

for class_name in tqdm(class_names, desc='Replace Class Names'):

    if class_name in exclude_greedy_class_names:
        print('Skipping {}'.format(class_name))
        continue
    re_subsection = re.compile(r'(\\(?:sub){{0,2}}section){{{c}}}\\label{{(.*)}}'.format(c=class_name))

    # Special case for lstinline, it must end up INSIDE the hyperref
    lstinline_re = re.compile(r'(\\lstinline!{}!)'.format(re.escape(class_name)))
    lstinline_repl = r'\\hyperref[{}]{{\1}}'.format(re.escape(subsection_names[class_name]))
    
    # We don't want to add an hyperref if there's already one obviously
    # So we use a negative look behind to make sure we don't already have the right link done
    # And we use a negative look ahead so not match objects that have the same common denominator
    # (eg: don't replace 'Output:Meter' inside 'Output:Meter:FileOnly')
    noref_re = re.compile(r'(?<!\\hyperref\[{l}]{{){c}(?!\:)'.format(c=re.escape(class_name),
                                                           l=re.escape(subsection_names[class_name])))
    
    noref_repl = r'\\hyperref[{l}]{{{c}}}'.format(c=class_name, l=subsection_names[class_name])
    
    outputs_re = re.compile(r'(\\(?:sub){1,2}section){Outputs?}\\label{.*}')
    
    #for p, lines in all_lines.items():

    for p in io_ref_files:
        with open(p, 'r') as f:
            lines = f.read().splitlines()
            all_lines[p] = lines
        
        
        #print("\n\n")
        #print(p)
        # We skip everything that inside these blocks: lstlisting, equation, tables
        inside_lstlisting = False
        inside_eq = False
        inside_table = False
        
        # We also don't want to add cross references to an object within its own section...
        inside_section = False
        # We need to capture the actual type (eg: section, subsection) to find
        # the next one so we know we're out
        section_type = None
        # One time flag, so we don't use a regex on all lines to find the section
        section_found = False
        
        # And we don't want to do it in the "Outputs" section
        inside_outputs = False
        # Based on the type of section the "Outputs" was found
        # We search for the next higher level
        next_closing_section = False
        
        new_lines = []

        for line in lines:
            
            if not section_found:
                m = re_subsection.search(line)
                if m:
                    # print("Found the section in {}".format(p))
                    section_found = True
                    inside_section = True
                    section_type = m.groups()[0]

            elif inside_section:
                if section_type in line:
                    inside_section = False

            if r"\begin{lstlisting}" in line:
                inside_lstlisting = True
            if r"\end{lstlisting}" in line:
                inside_lstlisting = False

            if r"\begin{equation}" in line:
                inside_eq = True
            if r"\end{equation}" in line:
                inside_eq = False
                
            if (r"\begin{table}" in line) or (r"\begin{longtable}" in line):
                inside_table = True
            if (r"\end{table}" in line) or (r"\end{longtable}" in line):
                inside_table = False
            
            m = outputs_re.search(line)
            if m:
                inside_outputs = True
                output_section_type = m.groups()[0]
                next_closing_section_type = output_section_type.replace('sub', '', 1)
            elif inside_outputs:
                if next_closing_section_type in line:
                    inside_outputs = False
            
            
                
            # If we're not inside a section/equation/table/lstlisting/Outputs
            # And class name is inside the line
            # And there is no \label (workaround to avoid paragraphs too)
            if ((class_name in line) and not
                (inside_lstlisting | inside_eq | inside_table
                 | inside_section | inside_outputs | ('\label' in line))):
                found_lines.append(line)
                # Start with the listinline one, lstinline has to be inside the hyperref text portion
                new_line = lstinline_re.sub(lstinline_repl, line)
                # textbf and emph are around, just like classic,
                # so we can do it for all others now
                new_line = noref_re.sub(noref_repl, line)
                new_lines.append(new_line)
                        
            else:
                # Do nothing
                new_lines.append(line)
                
        # Write new file
        with open(p, 'w') as f:
            f.write("\n".join(new_lines))