In [None]:
from buildstock_query import BuildStockQuery, ExeId, MappedColumn
import pandas as pd
import sqlalchemy as sa
import polars as pl
import os

In [None]:
my_run = BuildStockQuery(workgroup = 'largeee',
                         db_name = 'resstock_2024-1_oedi',
                         table_name = "annual_impacts",
                         db_schema="resstock_oedi",
                         skip_reports=True)

In [None]:
my_run

In [None]:
building_type_map = {"Mobile Home": "MH", "Single-Family Detached": "SF",
                     "Single-Family Attached": "SF", "Multi-Family with 2 - 4 Units": "MF",
                     "Multi-Family with 5+ Units": "MF",}
bldg_col = my_run._get_column('in.geometry_building_type_recs')
simple_bldg_col = MappedColumn(bsq=my_run, name='simple_bldg_type', mapping_dict=building_type_map,
                               key=bldg_col)

In [None]:
upgrade_id_list = ['3.07', '3.01', '4.04', '2.03', '2.04', '2.05', #individual non-HVAC measures
                   '4.05', '4.06', '4.07', '4.08', '4.09', '4.10', #furnace/boiler alone
                   '5.01', '5.13', '6.01', '6.10', '6.02', '6.11', '6.03', '6.12', #dual-fuel heat pump for existing fuel heating, same HP with electric backup for existing electric heating
                   '5.05', '5.06', '5.07', '5.08', '5.17', '5.18', '5.19', '5.20', #cold climate heat pump alone
                   '5.09', '5.10', '5.11', '5.12', '5.21', '5.22', '5.23', '5.24', #higher efficiency cold climate heat pump alone
                   '14.01', '14.02', '14.03', '14.04', '14.05', '14.06', #furnace/boiler + light touch envelope
                   '7.01', '7.13', '10.01', '10.10', '10.02', '10.11', '10.03', '10.12', #dual-fuel heat pump group + light touch envelope
                   '7.05', '7.06', '7.07', '7.08', '7.17', '7.18', '7.19', '7.20', #cold climate heat pump + light touch envelope
                   '7.09', '7.10', '7.11', '7.12', '7.21', '7.22', '7.23', '7.24', #higher efficiency cold climate heat pump + light touch envelope
                   '14.07', '14.08', '14.09', '14.10', '14.11', '14.12', #furnace/boiler + intermediate envelope
                   '8.01', '8.13', '11.01', '11.10', '11.02', '11.11', '11.03', '11.12',#dual-fuel heat pump group + intermeidate envelope
                   '8.05', '8.06', '8.07', '8.08', '8.17', '8.18', '8.19', '8.20', #cold climate heat pump + intermediate envelope
                   '8.09', '8.10', '8.11', '8.12', '8.21', '8.22', '8.23', '8.24', #higher efficiency cold climate heat pump + intermediate envelope
                   '14.13', '14.14', '14.15', '14.16', '14.17', '14.18', #furnace/boiler + advanced envelope
                   '9.01', '9.13', '12.01', '12.10', '12.11', '12.03', '12.12', #dual-fuel heat pump group + advanced envelope
                   '9.05', '9.06', '9.07', '9.08', '9.17', '9.18', '9.19', '9.20', #cold climate heat pump + advanced envelope
                   '9.09', '9.10', '9.11', '9.12', '9.21', '9.22', '9.23', '9.24', #higher efficiency cold climate heat pump + advanced envelope
                   '13.18', '13.19', #furnace/boiler + HPWH
                   '13.01', '13.02', '13.03', '13.04', '13.05', '13.06', '13.07', '13.08', #cold climate heat pump + HPWH
                   '15.01', '15.05', '15.17', '15.20', '15.18', '15.21', '15.19', '15.22', #dual-fuel heat pump + light touch + HPWH
                   '15.09', '15.10', '15.11', '15.12', '15.13', '15.14', '15.15', '15.16', #cold climate heat pump + light touch envelope + HPWH
                   '13.15', '13.16', '13.17' #HPWH and envelope
                   ]

In [None]:
#Get electricity savings
electricity_results = {}
fuel_of_interest = 'electricity'
fuel_enduse_field = 'out.electricity.total.energy_consumption'
for upgrade_id_instance in upgrade_id_list:
    result_name = upgrade_id_instance + '-' + fuel_of_interest
    print(result_name)
    result = my_run.savings.savings_shape(upgrade_id=upgrade_id_instance,
                             enduses=[fuel_enduse_field],
                             group_by=['in.state', 'in.heating_fuel', simple_bldg_col],
                             restrict=[('in.geometry_building_type_recs', ['Single-Family Detached', 'Single-Family Attached', 'Multi-Family with 2 - 4 Units', 'Multi-Family with 5+ Units'])],
                             annual_only=True,
                             get_query_only=False,
                             applied_only= True)
    electricity_results[result_name] = result
    #electricity_results[result_name].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')


In [None]:
#Get natural gas savings
ng_results = {}
fuel_of_interest = 'ng'
fuel_enduse_field = 'out.natural_gas.total.energy_consumption'
for upgrade_id_instance in upgrade_id_list:
    result_name = upgrade_id_instance + '-' + fuel_of_interest
    print(result_name)
    result = my_run.savings.savings_shape(upgrade_id=upgrade_id_instance,
                             enduses=[fuel_enduse_field],
                             group_by=['in.state', 'in.heating_fuel', simple_bldg_col],
                             restrict=[('in.geometry_building_type_recs', ['Single-Family Detached', 'Single-Family Attached', 'Multi-Family with 2 - 4 Units', 'Multi-Family with 5+ Units'])],
                             annual_only=True,
                             get_query_only=False,
                             applied_only= True)
    ng_results[result_name] = result
    #ng_results[result_name].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

In [None]:
#Get propane savings
propane_results = {}
fuel_of_interest = 'propane'
fuel_enduse_field = 'out.propane.total.energy_consumption'
for upgrade_id_instance in upgrade_id_list:
    result_name = upgrade_id_instance + '-' + fuel_of_interest
    print(result_name)
    result = my_run.savings.savings_shape(upgrade_id=upgrade_id_instance,
                             enduses=[fuel_enduse_field],
                             group_by=['in.state', 'in.heating_fuel', simple_bldg_col],
                             restrict=[('in.geometry_building_type_recs', ['Single-Family Detached', 'Single-Family Attached', 'Multi-Family with 2 - 4 Units', 'Multi-Family with 5+ Units'])],
                             annual_only=True,
                             get_query_only=False,
                             applied_only= True)
    propane_results[result_name] = result
    #propane_results[result_name].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

In [None]:
#Get fuel oil savings
fuel_oil_results = {}
fuel_of_interest = 'fuel_oil'
fuel_enduse_field = 'out.fuel_oil.total.energy_consumption'
for upgrade_id_instance in upgrade_id_list:
    result_name = upgrade_id_instance + '-' + fuel_of_interest
    print(result_name)
    result = my_run.savings.savings_shape(upgrade_id=upgrade_id_instance,
                             enduses=[fuel_enduse_field],
                             group_by=['in.state', 'in.heating_fuel', simple_bldg_col],
                             restrict=[('in.geometry_building_type_recs', ['Single-Family Detached', 'Single-Family Attached', 'Multi-Family with 2 - 4 Units', 'Multi-Family with 5+ Units'])],
                             annual_only=True,
                             get_query_only=False,
                             applied_only= True)
    fuel_oil_results[result_name] = result
    #fuel_oil_results[result_name].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

In [None]:
#define groups of upgrades
upgrade_groupings_list = [['3.07'], ['3.01'], ['4.04'], ['2.03'], ['2.04'], ['2.05'], #individual non-HVAC measures
                   ['4.05', '4.06', '4.07', '4.08', '4.09', '4.10'], #furnace/boiler alone
                   ['5.01', '5.13', '6.01', '6.10', '6.02', '6.11', '6.03', '6.12'], #dual-fuel heat pump for existing fuel heating, same HP with electric backup for existing electric heating
                   ['5.05', '5.06', '5.07', '5.08', '5.17', '5.18', '5.19', '5.20'], #cold climate heat pump alone
                   ['5.09', '5.10', '5.11', '5.12', '5.21', '5.22', '5.23', '5.24'], #higher efficiency cold climate heat pump alone
                   ['14.01', '14.02', '14.03', '14.04', '14.05', '14.06'], #furnace/boiler + light touch envelope
                   ['7.01', '7.13', '10.01', '10.10', '10.02', '10.11', '10.03', '10.12'], #dual-fuel heat pump group + light touch envelope
                   ['7.05', '7.06', '7.07', '7.08', '7.17', '7.18', '7.19', '7.20'], #cold climate heat pump + light touch envelope
                   ['7.09', '7.10', '7.11', '7.12', '7.21', '7.22', '7.23', '7.24'], #higher efficiency cold climate heat pump + light touch envelope
                   ['14.07', '14.08', '14.09', '14.10', '14.11', '14.12'], #furnace/boiler + intermediate envelope
                   ['8.01', '8.13', '11.01', '11.10', '11.02', '11.11', '11.03', '11.12'],#dual-fuel heat pump group + intermeidate envelope
                   ['8.05', '8.06', '8.07', '8.08', '8.17', '8.18', '8.19', '8.20'], #cold climate heat pump + intermediate envelope
                   ['8.09', '8.10', '8.11', '8.12', '8.21', '8.22', '8.23', '8.24'], #higher efficiency cold climate heat pump + intermediate envelope
                   ['14.13', '14.14', '14.15', '14.16', '14.17', '14.18'], #furnace/boiler + advanced envelope
                   ['9.01', '9.13', '12.01', '12.10', '12.11', '12.03', '12.12'], #dual-fuel heat pump group + advanced envelope
                   ['9.05', '9.06', '9.07', '9.08', '9.17', '9.18', '9.19', '9.20'], #cold climate heat pump + advanced envelope
                   ['9.09', '9.10', '9.11', '9.12', '9.21', '9.22', '9.23', '9.24'], #higher efficiency cold climate heat pump + advanced envelope
                   ['13.18', '13.19'], #furnace/boiler + HPWH
                   ['13.01', '13.02', '13.03', '13.04', '13.05', '13.06', '13.07', '13.08'], #cold climate heat pump + HPWH
                   ['15.01', '15.05', '15.17', '15.20', '15.18', '15.21', '15.19', '15.22'], #dual-fuel heat pump + light touch + HPWH
                   ['15.09', '15.10', '15.11', '15.12', '15.13', '15.14', '15.15', '15.16'], #cold climate heat pump + light touch envelope + HPWH
                   ['13.15'], ['13.16'], ['13.17'] #HPWH and envelope
                   ]
upgrade_nongroup_list = ['3.07', '3.01', '4.04', '2.03', '2.04', '2.05', '13.15', '13.16', '13.17']
upgrade_groups_list = [
                   ['4.05', '4.06', '4.07', '4.08', '4.09', '4.10'], #furnace/boiler alone
                   ['5.01', '5.13', '6.01', '6.10', '6.02', '6.11', '6.03', '6.12'], #dual-fuel heat pump for existing fuel heating, same HP with electric backup for existing electric heating
                   ['5.05', '5.06', '5.07', '5.08', '5.17', '5.18', '5.19', '5.20'], #cold climate heat pump alone
                   ['5.09', '5.10', '5.11', '5.12', '5.21', '5.22', '5.23', '5.24'], #higher efficiency cold climate heat pump alone
                   ['14.01', '14.02', '14.03', '14.04', '14.05', '14.06'], #furnace/boiler + light touch envelope
                   ['7.01', '7.13', '10.01', '10.10', '10.02', '10.11', '10.03', '10.12'], #dual-fuel heat pump group + light touch envelope
                   ['7.05', '7.06', '7.07', '7.08', '7.17', '7.18', '7.19', '7.20'], #cold climate heat pump + light touch envelope
                   ['7.09', '7.10', '7.11', '7.12', '7.21', '7.22', '7.23', '7.24'], #higher efficiency cold climate heat pump + light touch envelope
                   ['14.07', '14.08', '14.09', '14.10', '14.11', '14.12'], #furnace/boiler + intermediate envelope
                   ['8.01', '8.13', '11.01', '11.10', '11.02', '11.11', '11.03', '11.12'],#dual-fuel heat pump group + intermeidate envelope
                   ['8.05', '8.06', '8.07', '8.08', '8.17', '8.18', '8.19', '8.20'], #cold climate heat pump + intermediate envelope
                   ['8.09', '8.10', '8.11', '8.12', '8.21', '8.22', '8.23', '8.24'], #higher efficiency cold climate heat pump + intermediate envelope
                   ['14.13', '14.14', '14.15', '14.16', '14.17', '14.18'], #furnace/boiler + advanced envelope
                   ['9.01', '9.13', '12.01', '12.10', '12.11', '12.03', '12.12'], #dual-fuel heat pump group + advanced envelope
                   ['9.05', '9.06', '9.07', '9.08', '9.17', '9.18', '9.19', '9.20'], #cold climate heat pump + advanced envelope
                   ['9.09', '9.10', '9.11', '9.12', '9.21', '9.22', '9.23', '9.24'], #higher efficiency cold climate heat pump + advanced envelope
                   ['13.18', '13.19'], #furnace/boiler + HPWH
                   ['13.01', '13.02', '13.03', '13.04', '13.05', '13.06', '13.07', '13.08'], #cold climate heat pump + HPWH
                   ['15.01', '15.05', '15.17', '15.20', '15.18', '15.21', '15.19', '15.22'], #dual-fuel heat pump + light touch + HPWH
                   ['15.09', '15.10', '15.11', '15.12', '15.13', '15.14', '15.15', '15.16'] #cold climate heat pump + light touch envelope + HPWH
                   ]
upgrade_groups_names = ['fb', 'df', 'cchp', 'hecchp', 'fb lte', 'df lte', 'cchp lte', 'hecchp lte',
                    'fb ie', 'df ie', 'cchp ie', 'hecchp ie', 'fb ae', 'df ae', 'cchp ae', 'hecchp ae',
                    'fb hpwh', 'cchp hpwh', 'df lte hpwh', 'cchp lte hpwh']

In [None]:
#combine groups of upgrades

#electricity
fuel = 'electricity'
electricity_group_results = {}
for upgrade_group, group_name in zip(upgrade_groups_list, upgrade_groups_names):
    upgrade_and_fuel_list = []
    for id in upgrade_group:
        upgrade_and_fuel = id + '-' + fuel
        upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]
    results_list = [electricity_results[idfuel] for idfuel in upgrade_and_fuel_list]
    results_group = pd.concat(results_list).groupby(['state', 'heating_fuel', 'simple_bldg_type']).sum().reset_index()
    electricity_group_results[group_name] = results_group

#natural gas
fuel = 'ng'
ng_group_results = {}
for upgrade_group, group_name in zip(upgrade_groups_list, upgrade_groups_names):
    upgrade_and_fuel_list = []
    for id in upgrade_group:
        upgrade_and_fuel = id + '-' + fuel
        upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]
    results_list = [ng_results[idfuel] for idfuel in upgrade_and_fuel_list]
    results_group = pd.concat(results_list).groupby(['state', 'heating_fuel', 'simple_bldg_type']).sum().reset_index()
    ng_group_results[group_name] = results_group

#propane
fuel = 'propane'
propane_group_results = {}
for upgrade_group, group_name in zip(upgrade_groups_list, upgrade_groups_names):
    upgrade_and_fuel_list = []
    for id in upgrade_group:
        upgrade_and_fuel = id + '-' + fuel
        upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]
    results_list = [propane_results[idfuel] for idfuel in upgrade_and_fuel_list]
    results_group = pd.concat(results_list).groupby(['state', 'heating_fuel', 'simple_bldg_type']).sum().reset_index()
    propane_group_results[group_name] = results_group

#fuel oil
fuel = 'fuel_oil'
fuel_oil_group_results = {}
for upgrade_group, group_name in zip(upgrade_groups_list, upgrade_groups_names):
    upgrade_and_fuel_list = []
    for id in upgrade_group:
        upgrade_and_fuel = id + '-' + fuel
        upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]
    results_list = [fuel_oil_results[idfuel] for idfuel in upgrade_and_fuel_list]
    results_group = pd.concat(results_list).groupby(['state', 'heating_fuel', 'simple_bldg_type']).sum().reset_index()
    fuel_oil_group_results[group_name] = results_group

In [None]:
#create full results sets with both stand-alone packages and group packages

fuel = 'electricity'
upgrade_and_fuel_list = []
electricity_ungrouped_results = {}
for id in upgrade_nongroup_list:
    upgrade_and_fuel = id + '-' + fuel
    upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]

for key, value in electricity_results.items():
    if key in upgrade_and_fuel_list:
        upgrade_id = key.split('-')[0]
        electricity_ungrouped_results[upgrade_id] = value

electricity_packages_results = electricity_group_results|electricity_ungrouped_results


fuel = 'ng'
upgrade_and_fuel_list = []
ng_ungrouped_results = {}
for id in upgrade_nongroup_list:
    upgrade_and_fuel = id + '-' + fuel
    upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]

for key, value in ng_results.items():
    if key in upgrade_and_fuel_list:
        upgrade_id = key.split('-')[0]
        ng_ungrouped_results[upgrade_id] = value

ng_packages_results = ng_group_results|ng_ungrouped_results
ng_packages_results.keys()

fuel = 'propane'
upgrade_and_fuel_list = []
propane_ungrouped_results = {}
for id in upgrade_nongroup_list:
    upgrade_and_fuel = id + '-' + fuel
    upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]

for key, value in ng_results.items():
    if key in upgrade_and_fuel_list:
        upgrade_id = key.split('-')[0]
        propane_ungrouped_results[upgrade_id] = value

propane_packages_results = propane_group_results|propane_ungrouped_results

fuel = 'fuel_oil'
upgrade_and_fuel_list = []
fuel_oil_ungrouped_results = {}
for id in upgrade_nongroup_list:
    upgrade_and_fuel = id + '-' + fuel
    upgrade_and_fuel_list = upgrade_and_fuel_list + [upgrade_and_fuel]

for key, value in ng_results.items():
    if key in upgrade_and_fuel_list:
        upgrade_id = key.split('-')[0]
        fuel_oil_ungrouped_results[upgrade_id] = value

fuel_oil_packages_results = fuel_oil_group_results|fuel_oil_ungrouped_results


In [None]:
#save package group results to csvs
fuel = 'electricity'
for package in electricity_packages_results:
    result_name = fuel + "_package_"+str(package)
    electricity_packages_results[package].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')


fuel = 'ng'
for package in ng_packages_results:
    result_name = fuel + "_package_"+str(package)
    ng_packages_results[package].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

fuel = 'propane'
for package in propane_packages_results:
    result_name = fuel + "_package_"+str(package)
    propane_packages_results[package].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

fuel = 'fuel_oil'
for package in fuel_oil_packages_results:
    result_name = fuel + "_package_"+str(package)
    fuel_oil_packages_results[package].to_csv('C:/Users/epresent/NREL/C Bldg Stock - General/ResStock Fact Sheets/SLOPE Data Layer/Data Pulls/' + result_name + '.csv')

In [None]:
# Load package group characteristics from spreadsheet
data_dir = 'C:/Projects/SLOPE ResStock Data/Data Pulls'
data_struct_path = os.path.join(data_dir, 'Data Structure.xlsx')
package_group_defs = pl.read_excel(data_struct_path)

# Drop unused mockup columns
cols_to_drop = ['Heating Fuel', 'Building Type', 'Value Type', 'AR', 'AZ', 'CA', 'CO']
package_group_defs = package_group_defs.drop(cols_to_drop)

# Rename columns
col_renamer = {
    'HVAC': 'input_hvac',
    'Envelope': 'input_envelope',
    'Water Heater': 'input_water_heater',
    'Appliances': 'input_appliances',
    'Lighting': 'input_lighting',
    'Measure Package Group Provisional Code': 'package_group',
    'Measure Package Number(s)': 'packages'
}
package_group_defs = package_group_defs.rename(col_renamer)

# Drop the note row
package_group_defs = package_group_defs.filter(~pl.col('input_hvac').str.contains('Note'))

# Split each string list of packages into a list
package_group_defs = package_group_defs.with_columns(
    pl.col('packages').str.replace_all(' ', '', literal=True).str.split(by="+").alias("packages"),
)

display(package_group_defs)

In [None]:
# Combine all the individual package data into one big dataframe

# Package IDs
pkg_ids = []
for package_group_def in package_group_defs.to_dicts():
    for pid in package_group_def['packages']:
        pkg_ids.append(pid)

# Energy fuels
short_to_long_energy_fuels = {
    'electricity': 'electricity',
    'ng': 'natural_gas',
    'propane': 'propane',
    'fuel_oil': 'fuel_oil'
}

fuel_types = {
    'electricity': 'electricity',
    'ng': 'fuel',
    'propane': 'fuel',
    'fuel_oil': 'fuel'
}

pkg_dfs = []
for pkg_id in pkg_ids:
    for short_energy_fuel in list(short_to_long_energy_fuels.keys()):
        energy_fuel = short_to_long_energy_fuels[short_energy_fuel]
        fuel_type = fuel_types[short_energy_fuel]

        # Read the CSV
        csv_path = os.path.join(data_dir, f'{pkg_id}-{short_energy_fuel}.csv')
        if not os.path.exists(csv_path):
            print(f'ERROR: cannot find CSV data file: {pkg_id}-{short_energy_fuel}.csv')
            continue

        pkg_df = pl.read_csv(os.path.join(data_dir, f'{pkg_id}-{short_energy_fuel}.csv'))

        # Drop the index column from Pandas exports
        pkg_df = pkg_df.drop('')

        # Drop all the rows where heating_fuel == 'None' or 'Other Fuel'
        pkg_df = pkg_df.filter(~pl.col('heating_fuel').is_in(['None', 'Other Fuel']))

        # Rename the energy columns to remove the fuel type (now defined by the new columns)
        col_renamer = {
            f'{energy_fuel}.total.energy_consumption__baseline': 'baseline_energy_consumption_kwh',
            f'{energy_fuel}.total.energy_consumption__savings': 'energy_savings_kwh',
            'units_count': 'number_of_homes'
        }
        pkg_df = pkg_df.rename(col_renamer)

        # Find which package group this package is in (should only be in one package group)
        package_group_for_this_package = []
        for package_group_def in package_group_defs.to_dicts():
            if pkg_id in package_group_def['packages']:
                package_group_for_this_package.append(package_group_def['package_group'])
        assert len(package_group_for_this_package) == 1, f'Expected {pkg_id} to be in one package group, but was in {package_group_for_this_package}'
        package_group_for_this_package = package_group_for_this_package[0]
        # print(f'{pkg_id}-{energy_fuel} is in {ugrade_group_for_pkg_id}')

        # Add columns with the energy fuel, energy fuel type, package, and package group
        pkg_df = pkg_df.with_columns([
            pl.lit(energy_fuel).alias('energy_fuel'),
            pl.lit(fuel_type).alias('energy_fuel_type'),
            pl.lit(pkg_id).alias('package'),
            pl.lit(package_group_for_this_package).alias('package_group')
        ])

        pkg_dfs.append(pkg_df)

assert len(pkg_dfs) == 616  # This is the number of all the files in the directory

all_pkg_df = pl.concat(pkg_dfs)
display(all_pkg_df)

In [None]:
# Calculate savings and unit counts for each package group
package_groupby_cols = [
    'package_group',
    'simple_bldg_type',
    'state',
    'heating_fuel',
    'energy_fuel_type'
]

all_tots = []
big_sample_tots = []
small_sample_tots = []
for gb_vals, gb_df in all_pkg_df.group_by(package_groupby_cols, maintain_order=True):
    group_totals = dict(zip(package_groupby_cols, gb_vals))

    # When summing multiple packages, sum unit/sample counts because packages are mutually exclusive.
    # When summing energy fuels within a package group:
    #   sum savings and energy consumption
    group_totals['baseline_energy_consumption_kwh'] = gb_df.get_column('baseline_energy_consumption_kwh').sum()
    group_totals['energy_savings_kwh'] = gb_df.get_column('energy_savings_kwh').sum()
    #   sum unit/sample counts then divide by the number of energy fuels included in the sums
    num_energy_fuels = len(gb_df.get_column('energy_fuel').unique().to_list())
    group_totals['sample_count'] = gb_df.get_column('sample_count').sum() / num_energy_fuels
    group_totals['number_of_homes'] = gb_df.get_column('number_of_homes').sum() / num_energy_fuels

    # Evaluate the sample count
    if group_totals['sample_count'] > 100:
        group_totals['enough_samples'] = True
    else:
        group_totals['enough_samples'] = False

    all_tots.append(group_totals)

# Combine sample counts into data frames
all_sample_df = pl.DataFrame(all_tots)
display(all_sample_df)

In [None]:
# Join on package group characteristics used to filter the data in UI
all_sample_df = all_sample_df.join(package_group_defs, on='package_group', how='left')

In [None]:
# Order columns
col_order = [
    'input_hvac',
    'input_envelope',
    'input_water_heater',
    'input_appliances',
    'input_lighting',
    'package_group',
    'heating_fuel',
    'simple_bldg_type',
    'state',
    'energy_fuel_type',
    'baseline_energy_consumption_kwh',
    'energy_savings_kwh',
    'sample_count',
    'number_of_homes',
    'enough_samples',
    # 'packages' # can't export nested data to CSV
]
all_sample_df = all_sample_df.select(col_order)
display(all_sample_df)

In [None]:
# Export data for QAQC
all_sample_df.write_csv(os.path.join(data_dir, 'resstock_slope_update_qaqc.csv'))

# Reduce to sufficient samples and remove columns not used in mockups
sufficient_sample_df = all_sample_df.filter(pl.col('enough_samples') == True)
qaqc_cols = ['enough_samples', 'sample_count', 'baseline_energy_consumption_kwh', 'packages']
sufficient_sample_df = sufficient_sample_df.drop(qaqc_cols)

# Export final data
sufficient_sample_df.write_csv(os.path.join(data_dir, 'resstock_slope_update.csv'))