In [22]:
###import comstockpostproc-standard things, and then don't use most of them

import os
from textwrap import indent

import boto3
import logging
import numpy as np
import pandas as pd

from comstockpostproc.resstock_naming_mixin_LARGEE import ResStockNamingMixin
from comstockpostproc.units_mixin import UnitsMixin
from comstockpostproc.s3_utilities_mixin import S3UtilitiesMixin
from comstockpostproc import resstock_LARGEE

logger = logging.getLogger(__name__)

In [None]:
class ResStock_data_process():
    def __init__(self, resstock_results_folder, resstock_file_name, downselect_rows_tf, downselect_row_fields, 
                 values_to_keep,  col_plan_folder, col_plan_name,
                 add_wide_fields_tf, dfs_for_wide_fields, wide_mergeon_fields, wide_merge_cols, wide_col_plans, wide_merge_newnames,
                 add_local_bills_tf, add_first_costs_tf, cost_inputs_folder, cost_inputs_filename, downselect_cols_tf, add_long_fields_tf, rate_inputs_df, 
                 long_fields_also_wide_tf, long_fields_also_wide, long_fields_also_wide_names,
                 save_file_tf, output_folder, output_file_name, debug_tf):
        """
        A class to load and transform ResStock 2024.2 data for futher steps in an automated workflow
        """
        #initialize members
        self.resstock_results_folder = resstock_results_folder
        self.resstock_file_name = resstock_file_name
        self.downselect_rows_tf = downselect_rows_tf
        self.downselect_row_fields = downselect_row_fields
        self.values_to_keep = values_to_keep
        self.col_plan_folder = col_plan_folder
        self.col_plan_name = col_plan_name
        self.add_wide_fields_tf = add_wide_fields_tf
        self.dfs_for_wide_fields = dfs_for_wide_fields
        self.wide_mergeon_fields = wide_mergeon_fields
        self.wide_merge_cols = wide_merge_cols
        self.wide_col_plans = wide_col_plans
        self.wide_merge_newnames = wide_merge_newnames
        self.add_local_bills_tf = add_local_bills_tf
        self.add_first_costs_tf = add_first_costs_tf
        self.cost_inputs_folder = cost_inputs_folder
        self.cost_inputs_filename = cost_inputs_filename
        self.downselect_cols_tf = downselect_cols_tf
        self.add_long_fields_tf = add_long_fields_tf
        self.rate_inputs_df = rate_inputs_df
        self.long_fields_also_wide_tf = long_fields_also_wide_tf
        self.long_fields_also_wide = long_fields_also_wide
        self.long_fields_also_wide_names = long_fields_also_wide_names
        self.save_file_tf = save_file_tf
        self.output_folder = output_folder
        self.output_file_name = output_file_name
        self.debug_tf = debug_tf

        #execute
        self.download_data()
        self.downselect_rows()
        self.make_col_plan()
        self.add_wide_fields()
        self.downselect_cols()
        self.pivot_data()
        self.add_long_fields()
        self.categorize_outputs()
        self.addl_wide_fields_in_long()
        self.add_weighted_values_col()
        self.return_and_save_file()

    def download_data(self):
    #load results from already-downloaded OEDI file
        if self.debug_tf == True:
            print (1)
        results_file_path = os.path.join(self.resstock_results_folder, self.resstock_file_name)
        self.data = pd.read_csv(results_file_path, engine = "pyarrow")
        #self.data = self.data.head(5) #for testing purposes, shrink the data

    def downselect_rows(self):
    #downselect to a subset of results 
        if self.debug_tf == True:
            print (2)
        if(self.downselect_rows_tf == True):
            for field, values in zip(self.downselect_row_fields, self.values_to_keep):
                self.data = self.data.loc[self.data[field].isin(values)]

    def make_col_plan(self):
    #assign a plan for each column in the dataset, from a premade csv
        if self.debug_tf == True:
            print (3)
        plan_file_path = os.path.join(self.col_plan_folder, self.col_plan_name)
        self.col_plan = pd.read_csv(plan_file_path, engine = "pyarrow")
        #flag columns in the data that aren't in the column plan
        data_cols = self.data.columns.tolist()
        cols_in_plan = self.col_plan['column'].tolist()
        cols_not_in_plan = list(set(data_cols) - set(cols_in_plan))
        if len(cols_not_in_plan) > 0:
            print ("These columns are in the data but not the column plan:") 
            print(cols_not_in_plan)
        #flag columns in the column plan that will need to be added to the data
        cols_not_in_data = list(set(cols_in_plan) - set(data_cols))
        if len(cols_not_in_data) > 0:
            print ("These columns are not in the data and will be added, with NaNs:")
            print (cols_not_in_data)
        #remake data in standard order and with cols of NAs so that all files have the same columns
        for col in cols_not_in_data:
            self.data[col] = np.nan
        self.data = self.data[cols_in_plan]
        #create lists of columns
        self.cols_to_remove = self.col_plan.loc[self.col_plan['plan']=='remove', 'column'].tolist()
        self.cols_wide = self.col_plan.loc[self.col_plan['plan']=='keep', 'column'].tolist()
        self.cols_to_pivot = self.col_plan.loc[self.col_plan['plan']=='pivot', 'column'].tolist()
        
    def add_wide_fields(self):
    #add additional wide format fields before pivoting, and also add plans for them
        if self.debug_tf == True:
            print (4)
        #add additional precomputed wide format fields before pivoting
        if(self.add_wide_fields_tf == True):
            if self.debug_tf == True:
                print ("4a")
            for dfw, wide_mergeon_field, wide_merge_col, wide_merge_newname, wide_col_plan in zip(
                self.dfs_for_wide_fields, self.wide_mergeon_fields, self.wide_merge_cols, self.wide_merge_newnames, self.wide_col_plans):
                self.data = self.data.merge(dfw, [[wide_mergeon_field, wide_merge_col]], on = wide_mergeon_field, how = "left")
                self.data.rename(columns = {wide_merge_col:wide_merge_newname}, inplace = True)
                if self.wide_col_plan == 'pivot':
                    self.cols_to_pivot = self.cols_to_pivot + [wide_merge_newname]
                elif self.wide_col_plan == 'keep':
                    self.cols_wide = self.cols_wide + [wide_merge_newname]
                else:
                    self.cols_to_remove = self.cols_to_remove + [wide_merge_newname]
        #add local bills before pivoting
        if(self.add_local_bills_tf == True):
            if self.debug_tf == True:
                print ("4b")
            for index, row in self.rate_inputs_df.iterrows():
                if row['column']== "out.bills_local.natural_gas.total.usd.savings":
                    self.data[row['column']] = row['fixed monthly cost']*12*(np.logical_and((self.data["out.natural_gas.total.energy_consumption.kwh.savings"]!=0), 
                                                                              (self.data["out.natural_gas.total.energy_consumption.kwh"]==0))) + row['variable cost per kwh']*(
                                                                            self.data[row['col list for scaling']].sum(axis = 1)) #should add fixed costs to bill savings if all consumption is removed
                # for each row of rate inputs, create a column in the data, which will be NaN if the scaling row doesn't exist (e.g savings rows in baseline) and 0 if the relevant consumption rows don't exist
                else:
                    self.data[row['column']] = row['fixed monthly cost']*12*((self.data[row['col list for scaling']].sum(axis = 1))!=0) + row['variable cost per kwh']*(self.data[row['col list for scaling']].sum(axis = 1))
                if row['plan'] == 'pivot':
                    self.cols_to_pivot = self.cols_to_pivot + [row['column']]
                elif row['plan'] == 'keep':
                    self.cols_wide = self.cols_wide + [row['column']]
                else:
                    self.cols_to_remove = self.cols_to_remove + [row['column']]
            plan_for_new_cols_df = self.rate_inputs_df.drop(['fixed monthly cost', 'variable cost per kwh', 'col list for scaling'], axis = 1)
            self.col_plan = pd.concat([self.col_plan, plan_for_new_cols_df], axis = 0, ignore_index=True)
        #add local first costs (upgrade costs) before pivoting (but don't pivot these, treat them as parameters)
        if(self.add_first_costs_tf == True):
            print ("first costs")
            cost_inputs_filepath = os.path.join(self.cost_inputs_folder, self.cost_inputs_filename)
            cost_inputs = pd.read_csv(cost_inputs_filepath, engine = "pyarrow")
            up_costs = []
            for index, row in self.data.iterrows():
                cost = 0
                upgrade = row["upgrade"]
                if upgrade == 0:
                    up_costs.append(cost)
                elif (row['applicability']!=True):
                    up_costs.append(cost)
                else:     #actually calculate costs          
                    #extract necessary data
                    location = row[cost_inputs['Location Field Match'][0]] #just using whatever geographic resolution the first row of Cost Inputs has, for now
                    climate_zone = int(row["in.ashrae_iecc_climate_zone_2004"][0]) #just the number, not the letter
                    hp_size_btuh = row["out.params.size_heating_system_primary_k_btu_h"]
                    hp_size_tons = (hp_size_btuh *1000)/12000
                    attic_floor_area_sf = row["out.params.floor_area_attic_ft_2"]
                    num_exterior_doors = row["out.params.door_area_ft_2"]/20 #ResStock 2024.2 has 20ft2 total door area for any unit with exterior doors, which is approximately one door
                    num_windows = row["out.params.window_area_ft_2"]/15 #15 ft2 seems like a decent proxy for average window size based on standard window sizes
                    HPWH_gal = row["out.params.size_water_heater_gal"]
                    pool_heater_tons = 1 #proxy for all pool heaters, based loosely on looking at availability at Home Depot website
                    spa_heater_tons = 1 #proxy for all spa heaters
                    applicability_criteria_1 = row[cost_inputs['Applicability Criteria Field1'][0]]#for now this only works if there's just one applicability critiera field that's constant for the whole cost inputs dataset

                    # look up sum spec name on upgrade, location, and applicability criteria
                    if row['upgrade'] ==16: #there's no applicability critieria for this one
                        selected_index = cost_inputs[np.logical_and(cost_inputs['Location Value Match'] == location, 
                                                cost_inputs["Upgrade"] == upgrade)].index.values.astype(int)[0]
                    else:
                        selected_index = cost_inputs[np.logical_and(np.logical_and(cost_inputs['Location Value Match'] == location, 
                                                cost_inputs["Upgrade"] == upgrade),
                                                cost_inputs['Applicability Criteria Values1'] == applicability_criteria_1)].index.values.astype(int)[0]
                        #print(selected_index)
                
                    #extract all the right values for the sum spec name (this is cols H through X of the "Cost Inputs.csv" as of 2025-02-13)
                    hp_cost_per_ton = cost_inputs.loc[selected_index, "HP Cost Per Ton"]
                    hpwh_cost_per_gal = cost_inputs.loc[selected_index, "HPWH Cost Per Gallon"]
                    pool_heater_cost_per_ton = cost_inputs.loc[selected_index, "Pool Heater Cost Per Ton"]
                    spa_heater_cost_per_ton = cost_inputs.loc[selected_index, "Spa Heater Cost Per Ton"]
                    calc1_constant = cost_inputs.loc[selected_index, "Calc1 Constant"]
                    calc2_constant1 = cost_inputs.loc[selected_index, "Calc2 Constant1"]
                    calc2_constant2 = cost_inputs.loc[selected_index, "Calc2 Constant2"]
                    calc3_constant1 = cost_inputs.loc[selected_index, "Calc3 Constant1"]
                    calc3_constant2 = cost_inputs.loc[selected_index, "Calc3 Constant2"]
                    calc4_constant1 = cost_inputs.loc[selected_index, "Calc4 Constant1"]
                    calc4_constant2 = cost_inputs.loc[selected_index, "Calc4 Constant2"]
                    calc1_coeff = cost_inputs.loc[selected_index, "Calc1 Coeff"]
                    calc2_coeff = cost_inputs.loc[selected_index, "Calc2 Coeff"]
                    calc3_coeff = cost_inputs.loc[selected_index, "Calc3 Coeff"]
                    calc4_coeff = cost_inputs.loc[selected_index, "Calc4 Coeff"]
                    fixed_costs_demo = cost_inputs.loc[selected_index, "Fixed Costs Demo"]
                    fixed_costs_install = cost_inputs.loc[selected_index, "Fixed Costs Install"]

                    #do the algabraic cost calc
                    calc1 = attic_floor_area_sf * calc1_constant * calc1_coeff
                    calc2 = (num_exterior_doors * calc2_constant1 + num_windows * calc2_constant2) * calc2_coeff
                    if climate_zone < 4:
                        calc3 = calc3_constant1 * attic_floor_area_sf
                    else:
                        calc3 = calc3_constant2 * attic_floor_area_sf
                    if (climate_zone > 1 and climate_zone) < 4:
                        calc4 = calc4_constant1 * attic_floor_area_sf
                    else:
                        calc4 = calc4_constant2 * attic_floor_area_sf
                    cost = cost + (hp_size_tons * hp_cost_per_ton) + ( #coeffs are 0 where not applicable
                        HPWH_gal * hpwh_cost_per_gal) + (
                            pool_heater_tons * pool_heater_cost_per_ton) + (
                                spa_heater_tons * spa_heater_cost_per_ton) + (
                                    calc1 * calc1_coeff) + (
                                        calc2 * calc2_coeff) + (
                                            calc3 * calc3_coeff) + (
                                                calc4 * calc4_coeff) + (
                                                    fixed_costs_demo + fixed_costs_install)
                    up_costs.append(float(cost))
                print (upgrade)
                print (cost)
            self.data['out.first_costs.usd'] = up_costs
            self.cols_wide = self.cols_wide + ['out.first_costs.usd']
            first_costs_col_plan = pd.DataFrame({'column': ['out.first_costs.usd'], 'col_type': ['unique'], 'plan': ['keep'], 
                                    'Result Type': ['First Cost'], 'Fuel': ['NA'], 'End Use': ['NA'], 'End Use Category':['NA']})
            self.col_plan = pd.concat([self.col_plan, first_costs_col_plan], axis = 0, ignore_index=True)
    
    
    def downselect_cols(self):
    #remove unneceessary columns
        if self.debug_tf == True:
            print (5)
        if(self.downselect_cols_tf == True):
            self.data.drop(self.data[self.cols_to_remove], axis = 1, inplace = True)
            #print(self.data.columns)

    def pivot_data(self):
    #make all the results long format, keep the characteristics wide
        if self.debug_tf == True:
            print (6)
        self.data_long = pd.melt(
            self.data,
            id_vars = self.cols_wide,
            var_name = "Output",
            value_name = "Value"
        )
    
    def add_long_fields(self):
    #this is where you add any long format fields.
        if self.debug_tf == True:
            print (7)
        if(self.add_long_fields_tf == True):
            if self.debug_tf == True:
                print (1)

    def categorize_outputs(self):
    #Develop output categorization
        if self.debug_tf == True:
            print (8)
        #use mappings to get the categorizations
        out_cats = self.col_plan.drop(self.col_plan[["col_type", "plan"]], axis = 1, inplace = False)
        self.data_long = self.data_long.merge(out_cats, left_on = 'Output', right_on = "column", how = 'left')

    def addl_wide_fields_in_long(self):
    #re-merge in any long fields that are also needed as wide fields
        if self.debug_tf == True:
            print (9)
        if(self.long_fields_also_wide_tf == True):
            merge_data_cols = ["bldg_id"] + self.long_fields_also_wide
            self.data_long = self.data_long.merge(self.data[merge_data_cols], on = "bldg_id", how = "left")
            for colname, newcolname in zip(self.long_fields_also_wide, self.long_fields_also_wide_names):
                self.data_long.rename(columns = {colname:newcolname}, inplace = True)

    def add_weighted_values_col(self):
    #add a column with the weighted value alongside the unweighted value column
        if self.debug_tf == True:
            print (10)
        self.data_long['Weighted Value'] = self.data_long['Value']*self.data_long['weight']

    def return_and_save_file(self):
    #save file
        if self.debug_tf == True:
            print (11)
        return self.data_long
        if self.save_file_tf == True:
            self.data_long.to_csv(os.path.join(self.output_folder, self.output_file_name))

In [32]:
####Prepare utility rates for C2C DV

##project-specific utility bills - inputs
#Electricity
fixed_elec_cost_monthly = 10.56
var_elec_cost_per_kwh = 0.17404 #cf 0.137/kwh

#Natural Gas
fixed_ng_cost_monthly = 16.25
var_ng_cost_per_ccf = 1.495

#Fuel Oil
var_fo_cost_per_gal = 2.851

#Propane
var_propane_cost_per_gal = 3.199

##project-specific utility bills - unit conversions
gal_fuel_oil_to_mbtu = 139/1000
gal_propane_to_mbtu = 91.6 / 1000
mbtu_to_kwh = 293.0710701722222
dol_per_ccf_to_dol_per_therm = 1/1.038 #$ per Ccf divided by 1.038 equals $ per therm https://www.eia.gov/tools/faqs/faq.php?id=45&t=8
therm_to_kwh = 29.307107017222222

var_ng_cost_per_kwh = var_ng_cost_per_ccf * (dol_per_ccf_to_dol_per_therm) * (1/therm_to_kwh)
var_fo_cost_per_kwh = var_fo_cost_per_gal * (1/gal_fuel_oil_to_mbtu) * (1/mbtu_to_kwh)
var_propane_cost_per_kwh = var_propane_cost_per_gal * (1/gal_propane_to_mbtu) * (1/mbtu_to_kwh)

print(var_ng_cost_per_kwh) #0.0491440437526616, cf 0.0339307/kwh
print(var_fo_cost_per_kwh) #0.06998572515142275, cf 0.0704125/kwh
print(var_propane_cost_per_kwh) #0.11916420397790706. cf 0.101456/kWh

#assemble for input
rates_data_inputs = [
    ["out.bills_local.electricity.total.usd", fixed_elec_cost_monthly, var_elec_cost_per_kwh, ["out.electricity.total.energy_consumption.kwh"], 
     "out.x", "pivot", "Utility Bills", "Electricity", "Electricity Total", "Total"],
    ["out.bills_local.natural_gas.total.usd", fixed_ng_cost_monthly, var_ng_cost_per_kwh, ["out.natural_gas.total.energy_consumption.kwh"], 
     "out.x", "pivot", "Utility Bills", "Natural Gas", "Natural Gas Total", "Total"],
    ["out.bills_local.fuel_oil.total.usd", 0, var_fo_cost_per_kwh, ["out.fuel_oil.total.energy_consumption.kwh"], 
     "out.x", "pivot", "Utility Bills", "Fuel Oil", "Fuel Oil Total", "Total"],
    ["out.bills_local.propane.total.usd", 0, var_propane_cost_per_kwh, ["out.propane.total.energy_consumption.kwh"], 
     "out.x", "pivot", "Utility Bills", "Propane", "Total", "Total"],
    ["out.bills_local.all_fuels.total.usd", 0, 1, ["out.bills_local.electricity.total.usd", "out.bills_local.natural_gas.total.usd", "out.bills_local.fuel_oil.total.usd", "out.bills_local.propane.total.usd"], 
     "out.x", "pivot", "Utility Bill Totals", "Energy", "Total", "Total"],
    ["out.bills_local.electricity.total.usd.savings", 0, var_elec_cost_per_kwh, ["out.electricity.total.energy_consumption.kwh.savings"], 
     "out.x", "pivot", "Utility Bills", "Electricity", "Electricity Total", "Total"],
    ["out.bills_local.natural_gas.total.usd.savings", fixed_ng_cost_monthly, var_ng_cost_per_kwh, ["out.natural_gas.total.energy_consumption.kwh.savings"], 
     "out.x", "pivot", "Utility Bill Savings", "Natural Gas", "Natural Gas Total", "Total"],
    ["out.bills_local.fuel_oil.total.usd.savings", 0, var_fo_cost_per_kwh, ["out.fuel_oil.total.energy_consumption.kwh.savings"], 
     "out.x", "pivot", "Utility Bill Savings", "Fuel Oil", "Fuel Oil Total", "Total"],
    ["out.bills_local.propane.total.usd.savings", 0, var_propane_cost_per_kwh, ["out.propane.total.energy_consumption.kwh.savings"], 
     "out.x", "pivot", "Utility Bill Savings", "Propane", "Propane Total", "Total"],
    ["out.bills_local.all_fuels.total.usd.savings", 0, 1, ["out.bills_local.electricity.total.usd.savings", "out.bills_local.natural_gas.total.usd.savings", "out.bills_local.fuel_oil.total.usd.savings", "out.bills_local.propane.total.usd.savings"], 
     "out.x", "pivot", "Utility Bills Savings Totals", "Energy", "Total", "Total"]
]

rate_inputs_df = pd.DataFrame(rates_data_inputs, columns = ['column', 'fixed monthly cost', 'variable cost per kwh', 'col list for scaling', 'col_type', 'plan', 'Result Type', 'Fuel', 'End Use', 'End Use Category'])


0.0491440437526616
0.06998572515142275
0.11916420397790706


In [33]:
# process multiple sets of data
up_list = ["baseline", "upgrade02", "upgrade04", "upgrade07", "upgrade09", "upgrade12", "upgrade13", "upgrade16"]
processed_data = []
for up in up_list:
    print (up)
    resstock_file_name = "PA_" + up + "_metadata_and_annual_results.csv"
    outname = "PA_" + up + "processed_results.csv"
    results = ResStock_data_process(
        resstock_results_folder = "C:/Users/epresent/NREL/BuildStock Analysis User Engagement-C2C Delaware - Documents/10_Analysis/Data/2024.2/AMY2018",
        resstock_file_name = resstock_file_name,
        downselect_rows_tf = True, 
        downselect_row_fields = ["in.county_name"],
        values_to_keep = [["Montgomery County", "Bucks County", "Chester County", "Delaware County"]],
        col_plan_folder = 'C:/Users/epresent/NREL/BuildStock Analysis User Engagement-C2C Delaware - Documents/10_Analysis/Upgrade (4.3)',
        col_plan_name = '2024-2 Col Plan including C2C DV Upgrades.csv',
        add_wide_fields_tf = False,
        dfs_for_wide_fields = 'NA',
        wide_mergeon_fields = 'NA',
        wide_merge_cols = 'NA', 
        wide_col_plans = 'NA',
        wide_merge_newnames = 'NA',
        add_local_bills_tf = True,
        add_first_costs_tf = True, 
        cost_inputs_folder = 'C:/Users/epresent/NREL/BuildStock Analysis User Engagement-C2C Delaware - Documents/10_Analysis/Cost Data', 
        cost_inputs_filename = 'Cost Inputs.csv',
        downselect_cols_tf = True, 
        add_long_fields_tf = False, 
        rate_inputs_df = rate_inputs_df, 
        long_fields_also_wide_tf = True,
        long_fields_also_wide = ["out.emissions.all_fuels.lrmer_mid_case_15.co2e_kg", "out.bills_local.all_fuels.total.usd"], 
        long_fields_also_wide_names = ["Emissions", "Utility Bills Total"],
        save_file_tf = True,
        output_folder = "C:/Users/epresent/NREL/BuildStock Analysis User Engagement-C2C Delaware - Documents/10_Analysis/Upgrade (4.3)", 
        output_file_name = outname, 
        debug_tf = False
        )
    processed_data = processed_data + [results.data_long]

baseline
These columns are not in the data and will be added, with NaNs:
['out.load.hot_water.energy_delivered.kbtu.savings', 'out.natural_gas.permanent_spa_heat.energy_consumption.kwh.savings', 'out.electricity.heating_hp_bkup_fa.energy_consumption.kwh.savings', 'upgrade.water_heater_fuel', 'out.electricity.lighting_exterior.energy_consumption.kwh.savings', 'out.fuel_oil.hot_water.energy_consumption.kwh.savings', 'upgrade.heating_setpoint_offset_period', 'upgrade.misc_pool_heater', 'out.propane.clothes_dryer.energy_consumption.kwh.savings', 'out.electricity.total.energy_consumption.kwh.savings', 'out.electricity.plug_loads.energy_consumption.kwh.savings', 'out.electricity.lighting_interior.energy_consumption.kwh.savings', 'out.emissions_reduction.fuel_oil.lrmer_mid_case_25.co2e_kg', 'out.hot_water.dishwasher.gal.savings', 'upgrade.misc_hot_tub_spa', 'out.natural_gas.fireplace.energy_consumption.kwh.savings', 'out.electricity.heating.energy_consumption.kwh.savings', 'out.emissions_redu

  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[col] = np.nan
  self.data[row['column']] = row['fixed monthly cost']*12*((self.data[row['col list for scaling']].sum(axis = 1))!=0) + row['variable cost per kwh']*(self.data[row['col list for scaling']].sum(axis = 1))
  self.data[row['column']] = row['fixed monthly cost']*12*((self.data[row['col list for scaling']].sum(axis = 1))!=0) + row['variable cost per kwh']*(self.data[row['col list for scaling']].sum(axis = 1))
  self.data[row['column']] = row['fixed monthly cost']*12*((self.data[row['col list for scaling']].sum(axis = 1))!=0) + row['variable cost per kwh']*(self.data[row['col list for scaling']].sum(axis = 1))
  self.data[row['column']] = row['fixed monthly co

first costs
0
0
0
0
upgrade02
These columns are not in the data and will be added, with NaNs:
['upgrade.insulation_ceiling', 'upgrade.cooking_range', 'upgrade.misc_hot_tub_spa', 'upgrade.water_heater_efficiency', 'upgrade.hvac_secondary_heating_fuel', 'upgrade.misc_pool_heater', 'upgrade.hvac_secondary_heating_efficiency', 'upgrade.infiltration_reduction', 'upgrade.water_heater_fuel', 'upgrade.clothes_dryer']
first costs
2
19301.454759658332
2
10011.366825000001
upgrade04
These columns are not in the data and will be added, with NaNs:
['upgrade.insulation_ceiling', 'upgrade.cooking_range', 'upgrade.misc_hot_tub_spa', 'upgrade.water_heater_efficiency', 'upgrade.misc_pool_heater', 'upgrade.infiltration_reduction', 'upgrade.water_heater_fuel', 'upgrade.clothes_dryer']
first costs
4
12572.595286586667
4
8804.340208333333
upgrade07
These columns are not in the data and will be added, with NaNs:
['upgrade.cooking_range', 'upgrade.misc_hot_tub_spa', 'upgrade.water_heater_efficiency', 'upgrade

In [None]:
#save multiple sets of processed data 

##set destination
output_folder = "C:/Users/epresent/NREL/BuildStock Analysis User Engagement-C2C Delaware - Documents/10_Analysis/Upgrade (4.3)"
output_file_name = "C2C_DV_data_with_ups.csv"
output_path = os.path.join(output_folder, output_file_name)

#save first set of data, with headers
processed_data[0].to_csv(output_path, header = True, index = False)

#save remaining data, appending it to the same file. This should be fine since all the datasets should now be generating the same set of columns in the same order.
for dataset in processed_data[1:]:
    print(dataset.head())
    dataset.to_csv(output_path, header = False, index = False, mode = 'a')

   bldg_id  upgrade      weight  \
0       60        2  252.301639   
1       93        2  252.301639   
2       60        2  252.301639   
3       93        2  252.301639   
4       60        2  252.301639   

                                        upgrade_name  applicability  \
0  High efficiency cold-climate heat pump with el...           True   
1  High efficiency cold-climate heat pump with el...           True   
2  High efficiency cold-climate heat pump with el...           True   
3  High efficiency cold-climate heat pump with el...           True   
4  High efficiency cold-climate heat pump with el...           True   

  in.area_median_income in.ashrae_iecc_climate_zone_2004  \
0                 150%+                               4A   
1              100-120%                               4A   
2                 150%+                               4A   
3              100-120%                               4A   
4                 150%+                               4A   

 