# Create the "Aggregate Dataset" for LA100 - Upgrades

This notebook compares the total stock results of two upgrade runs, both have an upgrade to min eff MSHP from window AC:
* resstock-old (pre-HPXML) SHA: 08aa4eb3cc20908415fad631c8cbe9c29541d001 (04/08/2022)
* HPXML: SHA: 5f3f50fce1f45bc69fd60ad0f2276c464d4ba717 (04/09/2022)

yaml files available in la100es-resstock: https://github.com/NREL/la100es-resstock/tree/ll/pre-post-hpxml-comparison-runs/project_la

### 1. Import Modules

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

from eulpda.smart_query.EULPAthena import EULPAthena
import eulpcv.resstock_enduse_categories as enduse_categories
import eulpcv.hpxml_resstock_enduse_categories as hpxml_enduse_categories

from make_la100_dataset import AggregateDataLA

In [None]:
datadir = Path(".").resolve() / "data" / "la100_upgrades"
datadir.mkdir(parents=True, exist_ok=True)
print(f"data directory: {datadir}")

### 2. get simulated data - baseline and upgrades

In [None]:
sim_type = "upgrades" # <---- baseline or upgrades # if baseline, subset to those with applicable to upgrade only

##### 2.1. Get - Old ResStock run

In [None]:
enduses_not_available = [
    'electricity_central_system_pumps_heating_kwh', 
    'electricity_central_system_pumps_cooling_kwh',
    'electricity_central_system_heating_kwh',
    'electricity_central_system_cooling_kwh',
    ]
enduse_dict = {key:val for key, val in enduse_categories.enduse_category_dict().items() if key not in enduses_not_available}
enduses = list(enduse_dict.keys())


In [None]:
table_name = "2012_old_resstock_mshp_run"
file = datadir / f"{table_name}_{sim_type}_15min_timeseries.csv"

if file.exists():
    df_sim = pd.read_csv(file, parse_dates=["time"])

else:
    # [1] Initialize Athena object
    Athena = EULPAthena(
        workgroup='eulp',
        db_name='la-100',
        buildstock_type='resstock',
        table_name=table_name,
    )

    # [2] Get applicable building_id
    query = f"""
    SELECT "building_id" FROM "{table_name}_upgrades"
    WHERE "upgrade" = '1' AND "completed_status" = 'Success' AND "apply_upgrade.applicable" = true
    """
    bldgs = Athena.execute(query)
    print(f"{table_name} has {len(bldgs)} bldgs with upgrade applied")

    # [3] Get aggregates
    restrict = [
        ("completed_status",['Success']), 
        (f'"{table_name}_timeseries"."building_id"', bldgs["building_id"].to_list())
        ]
    if sim_type == "baseline":
        restrict.append(
            (f'"{table_name}_timeseries"."upgrade"', ['0'])
        )
    else:
        restrict.append(
            (f'"{table_name}_timeseries"."upgrade"', ['1'])
        )
    query = Athena.aggregate_timeseries(
            enduses = enduses,
            group_by = ['time'],
            order_by = ['time'],
            restrict = restrict,
            get_query_only=True,
        )
    df_sim = Athena.execute(query)
    
    # adjustment - kwh_per_unit
    for eu in enduses:
        df_sim[eu] = df_sim[eu].divide(df_sim["scaled_unit_count"])

    df_sim = df_sim.drop(columns=["scaled_unit_count"])
    df_sim.to_csv(file, index=False)
   
df_sim


In [None]:
### reduce data dimensions
df_sim = AggregateDataLA.resample_data(df_sim, freq='1h', make_period_beginning=True, count_cols=["raw_count"])
df_sim = AggregateDataLA.combine_end_uses(df_sim, enduse_dict)

df_sim

##### 2.2. Get - HPXML run

In [None]:

enduse_dict2 = hpxml_enduse_categories.enduse_category_dict()
enduses2 = hpxml_enduse_categories.enduse_list()

In [None]:
table_name2 = "2012_hpxml_mshp_run"
file2 = datadir / f"{table_name2}_{sim_type}_15min_timeseries.csv"

if file2.exists():
    df_sim2 = pd.read_csv(file2, parse_dates=["time"])

else:
    # [1] Initialize Athena object
    Athena2 = EULPAthena(
        workgroup='eulp',
        db_name='la-100',
        buildstock_type='resstock',
        table_name=table_name2,
    )

    # [2] Get applicable building_id
    query2 = f"""
    SELECT "building_id" FROM "{table_name2}_upgrades"
    WHERE "upgrade" = '1' AND "completed_status" = 'Success' AND "apply_upgrade.applicable" = true
    """
    bldgs2 = Athena2.execute(query2)
    print(f"{table_name2} has {len(bldgs2)} bldgs with upgrade applied")

    # [3] Get aggregates
    restrict = [
        ("completed_status",['Success']), 
        (f'"{table_name2}_timeseries"."building_id"', bldgs2["building_id"].to_list())
        ]
    if sim_type == "baseline":
        restrict.append(
            (f'"{table_name2}_timeseries"."upgrade"', ['0'])
        )
    else:
        restrict.append(
            (f'"{table_name2}_timeseries"."upgrade"', ['1'])
        )
    query2 = Athena2.aggregate_timeseries(
            enduses = enduses2,
            group_by = ['time'],
            order_by = ['time'],
            restrict = restrict,
            get_query_only=True,
        )
    df_sim2 = Athena2.execute(query2)

    # adjustment - kwh_per_unit
    for eu in enduses2:
        df_sim2[eu] = df_sim2[eu].divide(df_sim2["scaled_unit_count"])

    df_sim2 = df_sim2.drop(columns=["scaled_unit_count"])
    df_sim2.to_csv(file2, index=False)
   
df_sim2

In [None]:
### reduce data dimensions
df_sim2 = AggregateDataLA.resample_data(df_sim2, freq='1h', make_period_beginning=True, count_cols=["raw_count"])
df_sim2 = AggregateDataLA.combine_end_uses(df_sim2, enduse_dict2)

df_sim2

### 3. Compare end use plots between Old-ResStock and HPXML

In [None]:
### plotting paras

# from visualization_notebook
seasons = {
    "summer_months": [5, 6, 7, 8, 9],
    "shoulder_months": [4, 10, 11],
    "winter_months": [1, 2, 3, 12],
}

# from EZVIZ
color_list = [
                '#F7DF10',  # Interior Lighting
                '#DEC310',  # Exterior Lighting
                '#4A4D4A',  # Plug Loads
                '#29AAE7',  # Refrigerator
                '#3cb6f0',  # Extra Refrigerator
                '#59caff',  # Freezer
                '#51e889',  # Clothes Washer
                '#FF79AD',  # Clothes Dryer
                '#D3D3D3',  # Dishwasher
                '#ff2200',  # Cooking Range
                '#632C94',  # Well Pump
                '#ff7300',  # Pool/Spa Pump
                '#FFB239',  # Hot Water
                '#C0C0C0',  # Ceiling Fan
                '#FF79AD',  # Vent Fans
                '#632C94',  # HVAC Fan/Pump
                '#0071BD',  # Cooling
                '#EF1C21',  # Heating
                '#1adb61',  # Electric Vehicle
                '#4748a8'  # PV
            ]

In [None]:
### set up data
metric = "kwh_per_unit"
enduses = [x for x in enduse_categories.abbreviated_enduse_list() if x != "total"]

df_plot = df_sim.set_index(["timestamp"])[[x for x in enduses if x in df_sim.columns]]
df_plot2 = df_sim2.set_index(["timestamp"])[[x for x in enduses if x in df_sim2.columns]]

ymin = min(
    df_plot["pv"].min(),
    df_plot2["pv"].min()
)*1.2
ymax = max(
    df_plot.sum(axis=1).max(),
    df_plot2.sum(axis=1).max()
)*0.75
print(f"ymin: {ymin}, ymax: {ymax}")

In [None]:
### plot
print(f"Min-eff MSHP Upgrde: {sim_type}")
fig = plt.figure(figsize=(10, 10))

fig_n = 0
for season in seasons:
    ### [1] ###
    fig_n = fig_n + 1
    ax = plt.subplot(3, 2, fig_n)
    sim_run = "Old-ResStock"

    df_diurnal = df_plot.loc[df_plot.index.month.isin(seasons[season])]
    df_diurnal = df_diurnal.groupby(df_diurnal.index.hour).mean()
    missing_enduses = list(set(enduses).difference(set(df_diurnal.columns)))
    if missing_enduses:
        for eu in missing_enduses:
            df_diurnal[eu] = np.nan
    df_diurnal[enduses].plot(kind="area", stacked=True, ax=ax, color=color_list, legend=False, ls='None')
    ax.set_ylabel(metric)
    ax.set_ylim(ymin=ymin, ymax=ymax)
    ax.margins(x=0)
    ax.set_xticks(np.arange(df_diurnal.index[0], df_diurnal.index[-1], 3))
    ax.set_title(f'{sim_run}-{season}')

    ### [2] ###
    fig_n = fig_n + 1
    ax = plt.subplot(3, 2, fig_n)
    sim_run2 = "HPXML"

    df_diurnal2 = df_plot2.loc[df_plot2.index.month.isin(seasons[season])]
    df_diurnal2 = df_diurnal2.groupby(df_diurnal2.index.hour).mean()
    missing_enduses = list(set(enduses).difference(set(df_diurnal2.columns)))
    if missing_enduses:
        for eu in missing_enduses:
            df_diurnal2[eu] = np.nan
            
    df_diurnal2[enduses].plot(kind="area", stacked=True, ax=ax, color=color_list, legend=False, ls='None')
    ax.set_ylim(ymin=ymin, ymax=ymax)
    ax.margins(x=0)
    ax.set_xticks(np.arange(df_diurnal2.index[0], df_diurnal2.index[-1], 3))
    ax.set_title(f'{sim_run2} - {season}')
    
ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles[::-1], labels[::-1], loc="center left", bbox_to_anchor=(1, 0.5))
fig.tight_layout()

figfile = datadir / f"enduse_comparison_{sim_type}.pdf"
fig.savefig(figfile)

print(f'{sim_run}: {df_sim["raw_count"].mean()}')
print(f'{sim_run2}: {df_sim2["raw_count"].mean()}')
