In [None]:
# https://github.com/NREL/EULP-data-analysis/tree/smart_query_2.0
# source activate eulpda
# pip install -e .[dev]
# python -m ipykernel install --user --name eulpda --display-name eulpda

In [1]:
import pandas as pd
import yaml
from functools import reduce
from eulpda.smart_query.upgrades_analyzer import UpgradesAnalyzer as UA
import pathlib
from ast import literal_eval
import plotly.express as px
from eulpda.smart_query.resstock_athena import ResStockAthena
from eulpda.smart_query.eulp_athena import EULPAthena
from eulpda.smart_query.resstock_savings import ResStockSavings
import sqlalchemy as sa
import numpy as np

In [None]:
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)
pd.set_option('max_colwidth', 100)

In [11]:
sample_weight = 136569411.0 / 550000.0 # total downselected samples
resstock_savings = ResStockSavings(workgroup='zonalhp',
                                   db_name='zonal-hp',
                                   buildstock_type='resstock',
                                   table_name='final001',
                                   sample_weight=sample_weight)

INFO:eulpda.smart_query.resstock_athena:Loading final001 ...
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
ERROR:pyathena.common:Failed to get table metadata.
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\eulpda\lib\site-packages\pyathena\common.py", line 152, in _get_table_metadata
    response = retry_api_call(
  File "C:\ProgramData\Anaconda3\envs\eulpda\lib\site-packages\pyathena\util.py", line 84, in retry_api_call
    return retry(func, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\eulpda\lib\site-packages\tenacity\__init__.py", line 404, in __call__
    do = self.iter(retry_state=retry_state)
  File "C:\ProgramData\Anaconda3\envs\eulpda\lib\site-packages\tenacity\__init__.py", line 349, in iter
    return fut.result()
  

         Fail  Unapplicaple  Success      Sum  Applied %  no-chng  bad-chng  \
upgrade                                                                       
0         349             0    89533  89882.0        0.0        0         0   
1           1             0    89532  89533.0      100.0        3       391   
2           0             0    89533  89533.0      100.0        0       427   
3           0             0    89533  89533.0      100.0        0       427   
4           0             0    89533  89533.0      100.0        0       427   

         ok-chng  true-bad-chng  true-ok-chng  null    any  no-chng %  \
upgrade                                                                 
0              0              0             0     0      0        0.0   
1          89138             56         89473     0  89532        0.0   
2          89106            312         89221     0  89533        0.0   
3          89106            312         89221     0  89533        0.0   
4       

In [3]:
# Check if the options actually applied as per the apply logic
yaml_path = "zonalhp.yml"
#resstock_savings.check_options_integrity(yaml_path)

In [26]:
upgrade = 1
enduses = ['report_simulation_output.energy_use_total_m_btu']
energy_cols = enduses
groupby = ['geometry_building_type_recs', 'ashrae_iecc_climate_zone_2004']
get_query_only = False
unload_to = ''
df = resstock_savings.savings_shape(upgrade_id=upgrade,
                                    enduses=enduses,
                                    group_by=groupby,
                                    annual_only=True,
                                    applied_only=True,
                                    get_query_only=get_query_only,
                                    unload_to=unload_to)

In [None]:
# Good point to save query cache so that on next session you don't have to redo all the queries.
resstock_savings.save_cache(trim_excess=True)

In [27]:
df.head()

Unnamed: 0,sample_count,units_count,energy_use_total_m_btu__baseline,energy_use_total_m_btu__savings,geometry_building_type_recs,ashrae_iecc_climate_zone_2004
0,10455,2596060.0,188650500.0,98560560.0,Multi-Family with 2 - 4 Units,4A
1,233,57855.77,2794395.0,1088054.0,Multi-Family with 2 - 4 Units,4B
2,992,246321.6,9931765.0,3483123.0,Multi-Family with 2 - 4 Units,4C
3,13113,3256063.0,272318900.0,152063900.0,Multi-Family with 2 - 4 Units,5A
4,1175,291761.9,15810510.0,6862500.0,Multi-Family with 2 - 4 Units,5B


In [30]:
print(df['sample_count'].sum())
print(df['units_count'].sum())
print(df['energy_use_total_m_btu__baseline'].sum())

89532
22231513.646639768
1469064754.866021


In [22]:
savings = df.copy()
for col in energy_cols:
    name = col.removeprefix('report_simulation_output.')
    savings[f'{name}_average_savings'] = savings[f'{name}__savings'] / savings['units_count']

In [23]:
savings.head()

Unnamed: 0,sample_count,units_count,energy_use_total_m_btu__baseline,energy_use_total_m_btu__savings,geometry_building_type_recs,ashrae_iecc_climate_zone_2004,energy_use_total_m_btu_average_savings
0,10455,2596060.0,188650500.0,118115600.0,Multi-Family with 2 - 4 Units,4A,45.498036
1,233,57855.77,2794395.0,1311609.0,Multi-Family with 2 - 4 Units,4B,22.67033
2,992,246321.6,9931765.0,3883711.0,Multi-Family with 2 - 4 Units,4C,15.766835
3,13113,3256063.0,272318900.0,181005100.0,Multi-Family with 2 - 4 Units,5A,55.590172
4,1175,291761.9,15810510.0,8479718.0,Multi-Family with 2 - 4 Units,5B,29.063826


In [24]:
# Total change in energy consumption by building type and CZ -> ASHARE climate zone
for col in energy_cols:
    name = col.removeprefix('report_simulation_output.')
    fig = px.histogram(savings, x='ashrae_iecc_climate_zone_2004', y=f'{name}__savings', color='geometry_building_type_recs', barmode='group',
                      height=500, title=f'Total Annual Savings for {name} - Upgrade {upgrade}')
    fig.show()

In [25]:
# Annual enduse difference by building type
for col in energy_cols:
    name = col.removeprefix("report_simulation_output.")
    fig = px.histogram(savings, x='ashrae_iecc_climate_zone_2004', y=f'{name}_average_savings', color='geometry_building_type_recs', barmode='group',
                      height=500, title=f'Average annual savings per unit for {name} - Upgrade {upgrade}')
    fig.show()