In [1]:
"""
Purpose of this script is to provide summary statistics and figures for State Energy Offices
These will be provided to states for understanding their building stock
"""

'\nPurpose of this script is to provide summary statistics and figures for State Energy Offices\nThese will be provided to states for understanding their building stock\n'

In [2]:
import numpy as np
import pandas as pd
import scipy.stats as st
from pathlib import Path
import pathlib
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## Remapping Features

In [3]:
# Remap AMI for calculations
@staticmethod
def remap_area_median_income(df):
    df["AMI"] = df["area_median_income"].map(
        {
            "0-30%": "<80% AMI",
            "30-60%": "<80% AMI",
            "60-80%": "<80% AMI",
            "80-100%": "80%-150% AMI",
            "100-120%": "80%-150% AMI",
            "120-150%": "80%-150% AMI",
            "150%+": "150%+ AMI",
        }
    )
    return df

# Remap Metro
@staticmethod
def remap_metro(df):
    df["metro"] = df["puma_metro_status"].map(
        {
            "In metro area, not/partially in principal city": "Metro",
            "In metro area, principal city": "Metro",
            "Not/partially in metro area": "Non/Part-Metro",
        }
    )
    return df

# Remap Dwelling Geometries
@staticmethod
def remap_geom(df):
    df["geometry"] = df[ 'geometry_building_type_acs'].map(
        {
            'Single-Family Detached': "Single-Family", 
            'Mobile Home': "Single-Family",
            'Single-Family Attached': "Single-Family", 
            '10 to 19 Unit': "Multi-Family",
            '5 to 9 Unit': "Multi-Family", 
            '2 Unit': "Multi-Family", 
            '20 to 49 Unit': "Multi-Family",
            '50 or more Unit': "Multi-Family", 
            '3 or 4 Unit': "Multi-Family",

        }
    )
    return df

# Remap Cooling
@staticmethod
def remap_cooling(df):
    df["has_cooling"] = df[ 'hvac_cooling_type'].map(
        {
            "Central AC": "Yes",
            "Heat Pump": "Yes",
            "None": "No",
            "Room AC": "Yes",
        }
    )
    return df

# Remap Vintage
@staticmethod
def remap_vintage(df):
    df["vintage"] = df["vintage_acs"].map(
        {
            "<1940": "before 1980",
            "1940-59": "before 1980",
            "1960-79": "before 1980",
            "1980-99": "after 1980",
            "2000-09": "after 1980",
            "2010s": "after 1980",
        }
    )
    return df

# remap upgrade technologies
@staticmethod
def remap_upgrade(df):
    df["upgrade_name"] = df["upgrade_name"].map(
        {
            'enclosure.basic_upgrade':'Basic Enclosure',
            'hvac.heat_pump_min_eff_existing_backup': 'Min. Efficiency Heat Pump',
            'hvac.heat_pump_high_eff_electric_backup':'High Efficiency Heat Pump',
            'hvac.heat_pump_high_eff_electric_backup + enclosure.basic_upgrade': 'High Efficiency Heat Pump and Basic Enclosure'
        }
    )
    return df


# Remap df
@staticmethod
def remap(df):
    remap_area_median_income(df)
    remap_metro(df)
    remap_geom(df)
    remap_cooling(df)
    remap_vintage(df)
    return df



## Establish variables

# Sample weight
weight = 242.13101273

# color key
hist_blue = "#01639f"

ami_dark = "#01639f"
ami_med = "#0279c1"
ami_light = "#a0b7d9"

ac_dark = "#da8c15"
ac_light = "#f8bc86"

heat_ng = "#487525"
heat_fo = "#85ab72"
heat_prop = "#568d2d"
heat_elec = "#b9cbb1"
heat_other = "#818589"
heat_none = "#000000"

# pixel sizes
pxl_height =900
pxl_width = 1300


# States to analyze
states = ['MD', 
          'VA',
         'ID',
         'OR',
         'RI',
         'NY',
         'NM']

# Load Data

In [4]:
# import files
file = "/Users/kstenger/Documents/Volumes/process_euss_results.csv"
df = pd.read_csv(file)
df = remap(df)

## Minimum Efficiency Heat Pump

In [5]:
list(df.columns)

['building_id',
 'sample_weight',
 'ashrae_iecc_climate_zone_2004',
 'census_division',
 'census_region',
 'city',
 'clothes_dryer',
 'clothes_washer',
 'cooking_range',
 'county',
 'county_and_puma',
 'dishwasher',
 'ducts',
 'federal_poverty_level',
 'geometry_attic_type',
 'geometry_building_type_acs',
 'geometry_building_type_height',
 'geometry_building_type_recs',
 'geometry_floor_area_bin',
 'geometry_foundation_type',
 'geometry_wall_type',
 'has_pv',
 'heating_fuel',
 'hvac_cooling_efficiency',
 'hvac_cooling_partial_space_conditioning',
 'hvac_cooling_type',
 'hvac_has_ducts',
 'hvac_has_shared_system',
 'hvac_heating_efficiency',
 'hvac_heating_type',
 'hvac_heating_type_and_fuel',
 'hvac_secondary_heating_efficiency',
 'hvac_secondary_heating_type_and_fuel',
 'hvac_shared_efficiencies',
 'income_recs_2020',
 'infiltration',
 'insulation_ceiling',
 'insulation_foundation_wall',
 'insulation_rim_joist',
 'insulation_roof',
 'insulation_wall',
 'lighting',
 'occupants',
 'puma

## Demographic and Bill Impact Figures

In [5]:
def plot_bar_hist(df,state):
    # Create new df:
    bins = [-1000,-500,0,500,1000,1500,2000]
    df_demo = df['saving_bill.total_usd'].value_counts(bins=bins, sort=False).rename_axis('Bins').to_frame('Counts').reset_index()
    df_demo['Households'] = df_demo['Counts']*weight
    df_demo['Utility Bill Savings'] = df_demo['Bins'].astype(str)
    
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace('(', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(']', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(',', ' to')
            
    
    # plot in a bar graph and order categories
    fig = px.bar(
        data_frame = df_demo,
        x = "Utility Bill Savings",
        y = "Households",
        barmode = "group",
        template = "simple_white",
    )
    fig.update_layout(
        title_text="Minimum Efficiency HP with Existing Backup <br>Household Count by Bill Savings",
        title_x=0.5,
    )
    fig.write_image(output_file + state+ " histogram of bill savings.svg")
    return

def plot_bar_tech_hist(df,state,tech):
    # Create new df:
    bins = [-1000,-500,0,500,1000,1500,2000]
    df_demo = df['saving_bill.total_usd'].value_counts(bins=bins, sort=False).rename_axis('Bins').to_frame('Counts').reset_index()
    df_demo['Households'] = df_demo['Counts']*weight
    df_demo['Utility Bill Savings'] = df_demo['Bins'].astype(str)
    
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace('(', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(']', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(',', ' to')
            
    
    # plot in a bar graph and order categories
    fig = px.bar(
        data_frame = df_demo,
        x = "Utility Bill Savings",
        y = "Households",
        barmode = "group",
        template = "simple_white",
    )
    fig.update_layout(
        title_text=tech+"<br>Household Count by Bill Savings",
        title_x=0.5,
    )
    fig.write_image(output_file + state+tech+ " histogram of bill savings.svg")
    return

def plot_bar_tech_hist_uni_y(df,state,tech,max_y):
    # Uniform histogram across technologies
    # Create new df:
    bins = [-1000,-500,0,500,1000,1500,2000]
    df_demo = df['saving_bill.total_usd'].value_counts(bins=bins, sort=False).rename_axis('Bins').to_frame('Counts').reset_index()
    df_demo['Households'] = df_demo['Counts']*weight
    df_demo['Utility Bill Savings'] = df_demo['Bins'].astype(str)
    
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace('(', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(']', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(',', ' to')
            
    
    # plot in a bar graph and order categories
    fig = px.bar(
        data_frame = df_demo,
        x = "Utility Bill Savings",
        y = "Households",
        barmode = "group",
        template = "simple_white",
    )
    fig.update_layout(
        title_text=tech+"<br>Household Count by Bill Savings",
        yaxis_range=[0,max_y],
        title_x=0.5,
    )
    fig.write_image(output_file + state+tech+ " histogram of bill savings.svg")
    return


def max_y_hist(df, state, tech):
    # Create new df:
    bins = [-1000,-500,0,500,1000,1500,2000]
    df_demo = df['saving_bill.total_usd'].value_counts(bins=bins, sort=False).rename_axis('Bins').to_frame('Counts').reset_index()
    df_demo['Households'] = df_demo['Counts']*weight
    
    max_y = df_demo['Households'].max()
    
    return max_y
            

def plot_bar_hist_pos(df,state):
    # Create new df:
    bins = [0,500,1000,1500,2000]
    df_demo = df['saving_bill.total_usd'].value_counts(bins=bins, sort=False).rename_axis('Bins').to_frame('Counts').reset_index()
    df_demo['Households'] = df_demo['Counts']*weight
    df_demo['Utility Bill Savings'] = df_demo['Bins'].astype(str)

            
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace('(', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(']', '')
    df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(',', ' to')
    
    # plot in a bar graph and order categories
    fig = px.bar(
        data_frame = df_demo,
        x = "Utility Bill Savings",
        y = "Households",
        barmode = "group",
        template = "simple_white"
    )
    fig.update_layout(
        title_text="Minimum Efficiency HP with Existing Backup <br>Household Count by Bill Savings (Positive Only)",
        title_x=0.5,
    )
    fig.write_image(output_file + state+ " histogram of bill savings.svg")
    return

def plot_pie_ami_breakout(df,state):
    
    #Pie chart
    fig = px.pie(df, values='building_id', 
                 names='AMI', 
            category_orders={
                    "AMI": ["<80% AMI", "80%-150% AMI", "150%+ AMI"],
                },
            color_discrete_sequence =[ami_dark, ami_med, ami_light])
    fig.update_traces(textinfo='none')
    fig.update_layout(
        title_text='AMI Breakout', title_x=0.5,
        font=dict(
            size=22,  # Set the font size here
        )
    )
    fig.write_image(output_file + state+ " AMI pie chart.svg")
    return

def plot_pie_cooling_breakout(df,state):
    #Pie chart
    fig = px.pie(df, values='building_id', 
                names='has_cooling', 
                category_orders={
                            "has_cooling":["Yes","No"]
                        },
                color_discrete_sequence = [ac_dark, ac_light])
    fig.update_traces(textinfo='none')
    fig.update_layout(
        title_text='Cooling Breakout', title_x=0.5,
        font=dict(
            size=22,  # Set the font size here
        ),
    )
    fig.write_image(output_file + state +" cooling pie chart.svg")
    return

def plot_pie_heating_breakout(df,state):
    #Pie chart
    fig = px.pie(df, values='building_id', 
                 names='heating_fuel', 
        category_orders={
            "heating_fuel":["Natural Gas", "Propane","Fuel Oil","Electricity", "Other Fuel","None",]
            },
        color_discrete_sequence=[heat_ng, heat_prop,heat_fo, heat_elec, heat_other, heat_none])
    fig.update_traces(textinfo='none')
    fig.update_layout(
        title_text='Heating Breakout', title_x=0.5,
        font=dict(
            size=22,  # Set the font size here
        )
    )
    fig.write_image(output_file + state + " heating pie chart.svg")
    return


## Mockup Figures

In [8]:
### Sandbox to create and mock up figures 

# output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/as is/'



# # State
# state = 'VA'

# df_up = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
#            (df['state'] == state)]

# #Plots
# plot_bar_hist(df_up, state)


# # Positive Bill Savings Plots; Stats
# df_up0 = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
#            (df['state'] == state)&
#             (df['saving_bill.total_usd']>0)]

# # Context of DF Filter
# output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/positive bill savings/'

# # Plots
# plot_bill_savings_hist_hp(df_up0, state)
# plot_pie_ami_breakout(df_up0, state)
# plot_pie_cooling_breakout(df_up0, state)
# plot_pie_heating_breakout(df_up0, state)

  df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace('(', '')
  df_demo['Utility Bill Savings'] = df_demo['Utility Bill Savings'].str.replace(']', '')


In [9]:
# Positive Bill Savings Plots and No Cost to Households; Stats
df_up1 = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
           (df['state'] == state)&
            (df['saving_bill.total_usd']>0)&
           (df['upgrade_cost_usd']<14001)]

# Context of DF Filter
output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/positive bill savings and no cost/'

# Plots
plot_bar_hist_pos(df_up1, state)
# plot_pie_ami_breakout(df_up1, state)
plot_pie_cooling_breakout(df_up1, state)
# plot_pie_heating_breakout(df_up1, state)



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



## Production of figures

In [13]:

for i in states:
    #
    # plot as IS
    # TODO: Fill out <------
    output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/as is/'
    df_up = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
           (df['state'] == i)]
    plot_bar_hist(df_up, i)
    
    #
    # plot only positive bill savings
    # TODO: Fill out <------
    output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/positive bill savings/'
    df_up0 = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
           (df['state'] == i)&
            (df['saving_bill.total_usd']>0)]
    plot_bar_hist_pos(df_up0, i)
    plot_pie_ami_breakout(df_up0, i)
    plot_pie_cooling_breakout(df_up0, i)
    plot_pie_heating_breakout(df_up0, i)

    #
    # plot positive bill savings and no cost for 50122 (50121 is too small)   
    # TODO: Fill out <------
    output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/positive bill savings and no cost/'
    df_up1 = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
                (df['state'] == i)&
                (df['saving_bill.total_usd']>0)&
                (df['upgrade_cost_usd']<14001)&
                (df['AMI'] =='<80% AMI')
                   ]
    plot_bar_hist_pos(df_up1, i)
    plot_pie_ami_breakout(df_up1, i)
    plot_pie_cooling_breakout(df_up1, i)
    plot_pie_heating_breakout(df_up1, i)
    



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to Fa


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to Fa

In [12]:
for i in states:
    #
    # plot as IS <--- TODO: Fill out 
    output_file = '/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/other tech/'

    #HPWH
    df_hpwh = df.loc[(df['upgrade_name'] == 'water_heater.heat_pump')&
           (df['state'] == i)]
    hpwh = "Heat Pump Water Heater"
    max_hpwh = max_y_hist(df_hpwh, state, hpwh)
    
    # Enhanced Enclosure
    df_enh = df.loc[(df['upgrade_name'] == 'enclosure.enhanced_upgrade')&
           (df['state'] == i)]
    enh = "Enhanced Enclosure"
    max_enh = max_y_hist(df_enh, state, enh)
    
    # High Efficiency HP w/ Electric Backup
    df_hp = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_high_eff_electric_backup')&
           (df['state'] == i)]
    hp = "High Efficiency HP with Electric Backup"
    max_hp = max_y_hist(df_hp, state, hp)
    
    max_y = max(max_hpwh, max_enh, max_hp)
    
    #Plot all tech
    plot_bar_tech_hist_uni_y(df_hpwh,i,hpwh, max_y)
    plot_bar_tech_hist_uni_y(df_enh,i,enh, max_y)
    plot_bar_tech_hist_uni_y(df_up,i,hp, max_y)
    

    


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to Fa


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to Fa

## Summary Statistics

In [58]:
# Import State Budget
budget_file = "/Users/kstenger/Documents/c. IRA_Estimation/50122StateBudgets.csv"
budget = pd.read_csv(budget_file)
# save statistics
file = "/Users/kstenger/Documents/c. IRA_Estimation/g. TA/stats/stats.md"

f = open(file, "w")

for i in states:    
    f.write('\n')
    f.write('--------------------')
    f.write(i)
    f.write('\n')
    
    
    # Stat1
    # For a minimum efficiency heat pump there are nearly X million homes that could see bill reductions
    count = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
           (df['state'] == i)]['building_id'].count()
    million = 1000000
    households = (count*weight)/million
    households = households.astype(str)
    
    f.write(' For a minimum efficiency heat pump there are ')
    f.write(households)
    f.write(' million homes that could see bill reductions')
    f.write('\n')
    
    # stat 2
    # Even limiting to households for which the Home Electrification and Appliance Rebates program 
    # would cover 100% of the cost of the equipment 
    # nearly XXX would benefit.
    count = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
                (df['state'] == i)&
                (df['saving_bill.total_usd']>0)&
                (df['upgrade_cost_usd']<14001)&
                (df['AMI'] =='<80% AMI')
                   ]['building_id'].count()
    households = count*weight
    households = households.astype(str)
    f.write('Even limiting to households for which the Home Electrification and Appliance Rebates program')
    f.write(' would cover 100% of the cost of the equipment nearly ')
    f.write(households)
    f.write(' would benefit.')
    f.write('\n')
    
    
    # Stat 3 + Stat 4
    # The average cost across this population is approximately 
    # $$$$ meaning with the budget for this program approximately 
    # ##### households could have their upgrades fully covered.
    avg = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')&
                (df['state'] == i)&
                (df['saving_bill.total_usd']>0)&
                (df['upgrade_cost_usd']<14001)&
                (df['AMI'] =='<80% AMI')
                   ]['upgrade_cost_usd'].mean()
    
    implement = budget.loc[budget['State'] == 'VA']['Budget for Implementation'].item()
    reach = implement/avg
    
    s_reach = reach.astype(str)
    s_avg = avg.astype(str)
    
    f.write('The average cost across this population is approximately ')
    f.write(s_avg)
    f.write(' meaning with the budget for this program approximately ')
    f.write(s_reach)
    f.write(' households could have their upgrades fully covered.')
    f.write('\n')
    
    

    


In [31]:
# Sandbox for double checking analysis and data 

df_hp = df.loc[(df['upgrade_name'] == 'hvac.heat_pump_high_eff_electric_backup')&
           (df['state'] == 'VA')]
df_hp['saving_bill.total_usd'].describe()


count    14152.000000
mean       751.841065
std        779.370481
min      -2853.526341
25%        278.415387
50%        531.203398
75%        979.263621
max      11385.211713
Name: saving_bill.total_usd, dtype: float64

# Appendix Figures

In [59]:

# Figure on slide 22
# TODO:
# # Measure Bill Savings by specific percentiles bar chart; for each measure/upgrade
# output = "/Users/kstenger/Documents/c. IRA_Estimation/g. TA/figures/supp all measure performance/"

# 5% perfermance
# 25%
# 50%
# 75%
# 95%



In [60]:
# Save to a different file location
# figure on Slide 23
# Highest Bill Saving potential (top 5%)

# Pie graph on AMI - use existing

# Pie graph on Fuel - use existing

# Pie graph on vintage
# TODO: Function logic


In [61]:
# save to a different file location
#figures on slide 24
# highest Carbon Savings (top 5% or 100 top microsegments)

# Pie graph on AMI - use existing

# Pie graph on Fuel - use existing

# Pie graph on vintage
# TODO: Function logic

In [62]:
# Microsegment Overlap
# Slide 25

# What percentage of households are in top of both lists?
# What # of households are in both lists?

# May need to change
# What the common factors
# SF homes (##%)
# Built before 1980 (##%)
# Heating fuel (##%)



