In [1]:
"""
The purpose of this document is to create figures 
for identifying the most at risk housing types for bill increases
by Census Divisions
using EUSS results - round 1


Data sourced from internal C-LEAP analysis and dashboard located here: 

https://public.tableau.com/app/profile/nrel.buildingstock/viz/Test-StateLevelResidentialBuildingStockandEnergyEfficiencyElectrificationPackagesAnalysis/Introduction


Created by: Katelyn Stenger
Review by: TBD
"""

'\nThe purpose of this document is to create figures \nfor identifying the most at risk housing types for bill increases\nby Census Divisions\nusing EUSS results - round 1\n\n\nData sourced from internal C-LEAP analysis and dashboard located here: \n\nhttps://public.tableau.com/app/profile/nrel.buildingstock/viz/Test-StateLevelResidentialBuildingStockandEnergyEfficiencyElectrificationPackagesAnalysis/Introduction\n\n\nCreated by: Katelyn Stenger\nReview by: TBD\n'

In [2]:
import numpy as np
import pandas as pd
import scipy.stats as st
from pathlib import Path
import pathlib
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [3]:
# import files
file = "/Users/kstenger/Documents/Volumes/process_euss_results.csv"
df = pd.read_csv(file)


# Remapping features

In [4]:
# Remap AMI for calculations
@staticmethod
def remap_area_median_income(df):
    df["AMI"] = df["area_median_income"].map(
        {
            "0-30%": "<80% AMI",
            "30-60%": "<80% AMI",
            "60-80%": "<80% AMI",
            "80-100%": "80%-150% AMI",
            "100-120%": "80%-150% AMI",
            "120-150%": "80%-150% AMI",
            "150%+": "150%+ AMI",
        }
    )
    return df

# Remap Metro
@staticmethod
def remap_metro(df):
    df["metro"] = df["puma_metro_status"].map(
        {
            "In metro area, not/partially in principal city": "Metro",
            "In metro area, principal city": "Metro",
            "Not/partially in metro area": "Non/Part-Metro",
        }
    )
    return df

# Remap Dwelling Geometries
@staticmethod
def remap_geom(df):
    df["geometry"] = df[ 'geometry_building_type_acs'].map(
        {
            'Single-Family Detached': "Single-Family", 
            'Mobile Home': "Single-Family",
            'Single-Family Attached': "Single-Family", 
            '10 to 19 Unit': "Multi-Family",
            '5 to 9 Unit': "Multi-Family", 
            '2 Unit': "Multi-Family", 
            '20 to 49 Unit': "Multi-Family",
            '50 or more Unit': "Multi-Family", 
            '3 or 4 Unit': "Multi-Family",

        }
    )
    return df

# Remap Cooling
@staticmethod
def remap_cooling(df):
    df["has_cooling"] = df[ 'hvac_cooling_type'].map(
        {
            "Central AC": "Yes",
            "Heat Pump": "Yes",
            "None": "No",
            "Room AC": "Yes",
        }
    )
    return df

# Remap Vintage
@staticmethod
def remap_vintage(df):
    df["vintage"] = df["vintage_acs"].map(
        {
            "<1940": "before 1980",
            "1940-59": "before 1980",
            "1960-79": "before 1980",
            "1980-99": "after 1980",
            "2000-09": "after 1980",
            "2010s": "after 1980",
        }
    )
    return df

# remap upgrade technologies
@staticmethod
def remap_upgrade(df):
    df["upgrade_name"] = df["upgrade_name"].map(
        {
            'enclosure.basic_upgrade':'Basic Enclosure',
            'hvac.heat_pump_min_eff_existing_backup': 'Min. Efficiency Heat Pump',
            'hvac.heat_pump_high_eff_electric_backup':'High Efficiency Heat Pump',
            'hvac.heat_pump_high_eff_electric_backup + enclosure.basic_upgrade': 'High Efficiency Heat Pump and Basic Enclosure'
        }
    )
    return df


# Remap df
@staticmethod
def remap(df):
    remap_area_median_income(df)
    remap_metro(df)
    remap_geom(df)
    remap_cooling(df)
    remap_vintage(df)
    return df

# Sample weight
weight = 242.13101273

## General Demographics for Division

In [5]:
# Subset to one upgrade since demographics are consistent across upgrades
dfb = df[df['upgrade_name'] =="enclosure.basic_upgrade"]

dfb = remap(dfb)

# Generate a csv for demographics detailed by Census Division

df_demo = pd.DataFrame(columns = ['census_division',
                                    'ami_80_count',
                                    'ami_150_count',
                                    'metro_perc',
                                    'owner_perc',
                                    'sf_dwelling_count',
                                    'mf_dwelling_count',
                                    'heating_ng_perc',
                                    'heating_elec_perc',
                                    'heating_fo_perc',
                                    'heating_prop_perc',
                                    'heating_none_perc',
                                    'heating_other_perc',
                                    'has_cooling_perc'])

ls_census = ((dfb['census_division'].unique()).tolist())

for i in ls_census:
    
    # total models in census_division (used for percentages)
    total = dfb[(dfb['census_division'] == i)]['building_id'].count()
    print(total)
    #0-80% AMI; (total count)
    ami_80 = dfb[(dfb['census_division'] == i) & 
                (dfb['AMI'] == "<80% AMI")
                ]['building_id'].count()*weight
    
    #80-150% AMI (total count)
    ami_150 = dfb[(dfb['census_division'] == i) & 
                (dfb['AMI'] == "80%-150% AMI")
                ]['building_id'].count()*weight
    
    # % Metro (v. non-Metro)
    metro = dfb[(dfb['census_division'] == i) & 
            (dfb['metro'] == "Metro")
            ]['building_id'].count()/total
    
    # % Owner (v. Renter)
    tenure = dfb[(dfb['census_division'] == i) & 
        (dfb['tenure'] == "Owner")
        ]['building_id'].count()/total

    # Single family (total count)
    sf = dfb[(dfb['census_division'] == i) & 
                (dfb['geometry'] == "Single-Family")
                ]['building_id'].count()*weight

    # Multifamily households (total count)
    mf = dfb[(dfb['census_division'] == i) & 
                (dfb['geometry'] == "Multi-Family")
                ]['building_id'].count()*weight

    # Heating Fuel (% for 'Propane', 'Natural Gas', 'Electricity', 'None', 'Other Fuel', 'Fuel Oil')
    ng = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "Natural Gas")
        ]['building_id'].count()/total

    elec = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "Electricity")
        ]['building_id'].count()/total

    prop = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "Propane")
        ]['building_id'].count()/total
    
    fo = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "Fuel Oil")
        ]['building_id'].count()/total
    
    other = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "Other Fuel")
        ]['building_id'].count()/total

    none = dfb[(dfb['census_division'] == i) & 
        (dfb['heating_fuel'] == "None")
        ]['building_id'].count()/total
    
    # % Have Cooling (v. do not have cooling)
    cool = dfb[(dfb['census_division'] == i) & 
        (dfb['has_cooling'] == "Yes")
        ]['building_id'].count()/total
    
    # append values to df
    df_demo = df_demo.append({
        'census_division':i,
        'ami_80_count':ami_80,
        'ami_150_count':ami_150,
        'metro_perc':metro,
        'owner_perc':tenure,
        'sf_dwelling_count':sf,
        'mf_dwelling_count':mf,
        'heating_ng_perc':ng,
        'heating_elec_perc':elec,
        'heating_fo_perc':fo,
        'heating_prop_perc':prop,
        'heating_none_perc':none,
        'heating_other_perc':other,
        'has_cooling_perc':cool,
    }, ignore_index = True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["AMI"] = df["area_median_income"].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["metro"] = df["puma_metro_status"].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["geometry"] = df[ 'geometry_building_type_acs'].map(
A value is trying to be set on a copy of a slice from a DataFram

111236


  df_demo = df_demo.append({


38577


  df_demo = df_demo.append({


62922


  df_demo = df_demo.append({


35585


  df_demo = df_demo.append({


74018


  df_demo = df_demo.append({


69953


  df_demo = df_demo.append({


79892


  df_demo = df_demo.append({


25842


  df_demo = df_demo.append({


33798


  df_demo = df_demo.append({


### Export Demographric summaries for each census division

In [6]:
# Fills in initial slidde demographics
output_file = '/Users/kstenger/Documents/c. IRA_Estimation/Fact Sheets - Census Division/production_statistics'
name = 'census_division_demographics_prereview_230623.csv'
df_demo.to_csv(output_file + name)

## Demographic Functions: stacked bar chart

In [7]:
# Plotting Functions
def plot_stacked_ami_cool_tenure(df,division):
    # Groupby data
    df_fig=df.groupby(['AMI','tenure','has_cooling'])['building_id'].sum().reset_index()

    df_fig['Percentage']=df_fig.groupby(['AMI','tenure','has_cooling'])['building_id'].sum().groupby(level=0).apply(lambda x:100 * x/float(x.sum())).values

    # Create Figure
    fig = px.bar(df_fig, x='AMI', y='Percentage', 
            color='has_cooling',
            pattern_shape = "tenure",
            barmode   ='stack',
            category_orders={
                "AMI": ["<80% AMI", "80%-150% AMI", "150%+ AMI"],
                "has_cooling":["Yes","No"]
                #TODO: redo order (other to Electric)
            },
            labels={
                "has_cooling": "Cooling Access",
                "tenure": "Tenure",
            },
            color_discrete_sequence =['green', 'gray'],)
    title_name = division+" Housing Demographics"
    fig.update_layout(title = title_name, 
                      xaxis_title = '% Area Median Income', yaxis_title =  
                      'Housing Stock (%)', width = 800, height = 400)
    
    # Export Figure to SVG (production quality)
    fig.write_image(output_file + title_name + ' ami cool tenure.svg')
    return

    
    
def plot_stacked_ami_vintage_heating(df,division):
    # Groupby data
    
    df_fig=df.groupby(['AMI','vintage','heating_fuel'])['building_id'].sum().reset_index()

    df_fig['Percentage']=df_fig.groupby(['AMI','vintage','heating_fuel'])['building_id'].sum().groupby(level=0).apply(lambda x:100 * x/float(x.sum())).values

    # Create Figure
    fig = px.bar(df_fig, x='AMI', y='Percentage', 
            color='heating_fuel',
            pattern_shape = "vintage",
            barmode   ='stack',
            category_orders={
                "AMI": ["<80% AMI", "80%-150% AMI", "150%+ AMI"],
                "vintage": ["before 1980","after 1980"],
                "heating_fuel":["Electricity","Natural Gas", "Propane", "Fuel Oil", "Other Fuel","None"]
            },
            labels={
                "vintage": "Vintage",
                "heating_fuel": "Heating Fuel",
            },
            color_discrete_sequence =['green', '#C3E6D9','#A3BFB5','#6C8079','#36403C','black'])
    title_name = division+" Housing Demographics"
    fig.update_layout(title = title_name, 
                      xaxis_title = '% Area Median Income', yaxis_title =  
                      'Housing Stock (%)', width = 800, height = 400)
    
    # Export Figure to SVG (production quality)
    fig.write_image(output_file + title_name + ' ami vintage heating.svg')
    return

## Iterate through each division and create figures

In [8]:
# list and iterate
ls_census = ((dfb['census_division'].unique()).tolist())
df = remap(df)
# Export Figure to SVG (production quality)
output_file = '/Users/kstenger/Documents/c. IRA_Estimation/Fact Sheets - Census Division/production pre-review visuals/'

for i in ls_census:
    # filter
    test = df[(df['census_division'] ==i)]
    # plot
    plot_stacked_ami_cool_tenure(test,i)
    plot_stacked_ami_vintage_heating(test,i)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df_fig['Percentage']=df_fig.groupby(['AMI','tenure','has_cooling'])['building_id'].sum().groupby(level=0).apply(lambda x:100 * x/float(x.sum())).values

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)


Not prepending group keys to the result index of transform-like apply. In the future

# Upgrades and Bill Analysis

## Technlogy Functions -- bar charts

In [9]:
def plot_bill_savings_cooling(df,division):   
    # Relative importance of rates vs. energy consumption
    fig = px.bar(
        data_frame = df.groupby(['has_cooling','upgrade_name']).mean().reset_index(),
        x = "upgrade_name",
        y = "saving_bill.total_usd",
        color="has_cooling",
        barmode = "group",
        category_orders={
            "upgrade_name":['Basic Enclosure',
                            'Min. Efficiency Heat Pump',
                           'High Efficiency Heat Pump',
                           'High Efficiency Heat Pump and Basic Enclosure'],
            "has_cooling":['Yes','No']
        },
        title = "Average Bill Savings by Upgrade and Cooling Type " + division,
        labels={
            "has_cooling": "Cooling in Baseline",
            "saving_bill.total_usd": "Utility Bill Savings ($-2022)",
            "upgrade_name": "Upgrade Name"
        },
        width=1000, height=800
    )

    fig.write_image(output_file + division+' bill saving by cooling.svg')
    return

def plot_household_with_bill_incr_by_heating(df,division):
    df_bill_inc = df[df['saving_bill.total_usd']<0]
    
    fig = px.bar(
        data_frame = df_bill_inc.groupby(['heating_fuel','upgrade_name'])['sample_weight'].sum().reset_index(),
        x = "upgrade_name",
        y = "sample_weight",
        color="heating_fuel",
        barmode = "group",
        category_orders={
            "upgrade_name":['Basic Enclosure',
                            'Min. Efficiency Heat Pump',
                           'High Efficiency Heat Pump',
                           'High Efficiency Heat Pump and Basic Enclosure'],
            "heating_fuel":['Electricity',
                            'Natural Gas',
                           'Fuel Oil',
                           'Propane',
                           'Other Fuel',
                           'None']
        },
        title = "Dwellings with Bill Increases by Upgrade and Heating Type",
        labels={
            "heating_fuel": "Heating Fuel in Baseline",
            "sample_weight": "Applicable Households",
            "upgrade_name": "Upgrade Name"
        },
        width=800, height=800
    )
    fig.write_image(output_file + division+ " households with bill incr by heating.svg")
    return

def plot_bill_savings_histogram(df,division):
    
    fig = px.histogram(
        df,
        x = "saving_bill.total_usd",
        y = "sample_weight",
        color="upgrade_name",
        barmode = "group",
        nbins = 20,
        category_orders={
            "upgrade_name":['Basic Enclosure',
                            'Min. Efficiency Heat Pump',
                           'High Efficiency Heat Pump',
                           'High Efficiency Heat Pump and Basic Enclosure'],
        },
        title = "Distribution of Bill Savings by Upgrade",
        labels={
            "saving_bill.total_usd": "Utility Bill Savings ($-2022)",
            "sample_weight": "Applicable Households",
            "upgrade_name": "Upgrade Name"
        },
        width=800, height=400
    )
    fig.write_image(output_file + division+ " histogram of bill savings.svg")
    return

def plot_avg_bill_incr_by_heating(df,division):
    fig = px.bar(
        data_frame = df.groupby(['heating_fuel','upgrade_name']).mean().reset_index(),
        y = "upgrade_name",
        x = "saving_bill.total_usd",
        color="heating_fuel",
        barmode = "group",
        category_orders={
            "upgrade_name":['Basic Enclosure',
                            'Min. Efficiency Heat Pump',
                           'High Efficiency Heat Pump',
                           'High Efficiency Heat Pump and Basic Enclosure'],
            "heating_fuel":['Electricity',
                            'Natural Gas',
                           'Fuel Oil',
                           'Propane',
                           'Other Fuel',
                           'None']
        },
        title = "Dwellings with Bill Increases by Upgrade and Heating Type",
        labels={
            "heating_fuel": "Heating Fuel in Baseline",
            "saving_bill.total_usd": "Average Utility Bill Savings ($-2022)",
            "upgrade_name": "Upgrade Name"
        },
        width=1200, height=600
        )
    fig.write_image(output_file + division+' average bill savings by heating.svg')
    return

def plot_households_bill_incr_by_cooling(df,division):
    df['bill_saving_filter'] = np.where(df['saving_bill.total_usd'] > 0, True, False)
    
    fig = px.bar(
        data_frame = df.groupby(['has_cooling','upgrade_name','bill_saving_filter']).sum().reset_index(),
        x = "upgrade_name",
        y = "sample_weight",
        color="has_cooling",
        pattern_shape = "bill_saving_filter",
        barmode = "group",
        category_orders={"upgrade_name": 
                        ["Basic Enclosure", 
                        'Min. Efficiency Heat Pump',
                        'High Efficiency Heat Pump',
                        'High Efficiency Heat Pump and Basic Enclosure'],
                        "has_cooling":
                         ["Yes",
                         "No"],
                         "bill_saving_filter":
                         [True,
                         False]
                        },
        color_discrete_sequence =['green','gray'],
        labels={
            "has_cooling": "Cooling in Baseline",
            "bill_saving_filter": "Bill Savings",
            "sample_weight": "Total applicable households",
            "upgrade_name": "Upgrade Name"
        },
        width=900, height=400
    )
    fig.write_image(output_file+division+' households by bill savings and cooling access.svg')



# Energy Figures by divisions

In [10]:
# Filter to relevant technologies 
df_tech = df[(df['upgrade_name'] == 'hvac.heat_pump_min_eff_existing_backup')
                 |(df['upgrade_name'] == 'hvac.heat_pump_high_eff_electric_backup')
                 |(df['upgrade_name'] == 'hvac.heat_pump_high_eff_electric_backup + enclosure.basic_upgrade')
                 |(df['upgrade_name'] == 'enclosure.basic_upgrade')
                ]

# TODO: filter out other fuel

# list and iterate
ls_census = ((df_tech['census_division'].unique()).tolist())

# Remap upgrade names
df_tech = remap_upgrade(df_tech)

# Export Figure to SVG (production quality)
output_file = '/Users/kstenger/Documents/c. IRA_Estimation/Fact Sheets - Census Division/production pre-review visuals/'

for i in ls_census:
    # filter
    test = df_tech[(df_tech['census_division'] ==i)]
    # plot
    plot_bill_savings_cooling(test,i)
    plot_household_with_bill_incr_by_heating(test,i)
    plot_bill_savings_histogram(test,i)
    plot_avg_bill_incr_by_heating(test,i)
    plot_households_bill_incr_by_cooling(test,i)
    




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The default value o


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.mean is 