## Visualization Notebook for LA100 5000K sample run comparison

A notebook to create comparison graphics by the City of LA total residential stock.

### LA Cohorts

Only the total residential stock is also available for visualizations.

### Season definitions
Seasons in this notebook are used to purely investigate the response of our energy models to weather.

- **Winter:** monthly average temperature < 55 F
- **Shoulder:** 55 F >= monthly average temperature <= 70 F
- **Summer:** monthly average temperature > 70 F

## Load Modules

In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from eulpcv.EZVIZ import EZVIZ
import eulpcv.resstock_enduse_categories as enduse_categories

from make_la100_dataset import AggregateDataLA

Note: The available Athena tables in the `la-100` bucket on AWS are as follows:

- '2012_old_resstock_mshp_run'
- '2012_hpxml_mshp_run'


## 1. Set path

In [None]:
### set file path
lrd_year = 2012 # <--- 2012, 2016, 2017
aggregate_data_file_path = Path(".").resolve().parent / "pre_post_hpxml_comparison" / "data" / "la100_baseline" / f"aggregated_dataset_LRD_{lrd_year}.csv"

print(f"aggregate_data_file_path exists? : {aggregate_data_file_path.exists()}")
aggregate_data_file_path


In [None]:
figure_output_dir = Path(".").resolve().parent / "pre_post_hpxml_comparison" / "data" / "la100_baseline" / "plots" / f"LRD_{lrd_year}"
figure_output_dir.mkdir(parents=True, exist_ok=True)

figure_output_dir

## 2. Create / load aggregated dataset
load dataset directly if *aggregate_data_file_path* exists

In [None]:
# Read data
if aggregate_data_file_path.exists():
    aggregated_dataset_raw = pd.read_csv(
        aggregate_data_file_path, 
        parse_dates = ['timestamp']
    )

else:
    AggData = AggregateDataLA()
    aggregated_dataset_raw = AggData.get_aggregated_dataset(lrd_year)
    
print("aggregated_dataset_raw loaded")
aggregated_dataset_raw

In [None]:
# Info about the dataframe
print("Shape of Aggregate Dataset: ", np.shape(aggregated_dataset_raw))
print()

print('Columns of the Aggregate Dataset:')
for col in aggregated_dataset_raw.columns.values:
    print('  %s' % col)

print()
print('Truth data and simulation results available:')
for run in aggregated_dataset_raw['run'].unique():
    print('  %s' % run)

print()
print('Stock sections available for analysis:')
for cohort in aggregated_dataset_raw['cohort'].unique():
    print('  %s' % cohort)


In [None]:
# Summary about the dataframe
summary = aggregated_dataset_raw.groupby(['run','cohort'])[['dwelling_units', 'kwh', 'kwh_per_unit']].mean()
summary

## 3. Visualize
### 3.1. Compare end use plots between Old-ResStock, HPXML, and Old-LA100 results

In [None]:
metric = "kwh_per_unit" # <--- "kwh", "kwh_per_unit"
enduses = [x for x in enduse_categories.abbreviated_enduse_list() if x != "total"]

In [None]:
run = "2012_old_resstock_mshp_run"
df_plot = aggregated_dataset_raw.loc[
    (aggregated_dataset_raw["run"]==run) & \
    (aggregated_dataset_raw["enduse_category"].isin(enduses)) & \
    (aggregated_dataset_raw["cohort"]=="Total Residential Stock")
].set_index(["timestamp", "enduse_category"])[metric].unstack()

run2 = "2012_hpxml_mshp_run"
df_plot2 = aggregated_dataset_raw.loc[
    (aggregated_dataset_raw["run"]==run2) & \
    (aggregated_dataset_raw["enduse_category"].isin(enduses)) & \
    (aggregated_dataset_raw["cohort"]=="Total Residential Stock")
].set_index(["timestamp", "enduse_category"])[metric].unstack()

run3 = "2015_reference_old_la100"
df_plot3 = aggregated_dataset_raw.loc[
    (aggregated_dataset_raw["run"]==run3) & \
    (aggregated_dataset_raw["enduse_category"].isin(enduses)) & \
    (aggregated_dataset_raw["cohort"]=="Total Residential Stock")
].set_index(["timestamp", "enduse_category"])[metric].unstack()

In [None]:
### plotting paras

# from visualization_notebook
seasons = {
    "summer_months": [5, 6, 7, 8, 9],
    "shoulder_months": [4, 10, 11],
    "winter_months": [1, 2, 3, 12],
}

# from EZVIZ
color_list = [
                '#F7DF10',  # Interior Lighting
                '#DEC310',  # Exterior Lighting
                '#4A4D4A',  # Plug Loads
                '#29AAE7',  # Refrigerator
                '#3cb6f0',  # Extra Refrigerator
                '#59caff',  # Freezer
                '#51e889',  # Clothes Washer
                '#FF79AD',  # Clothes Dryer
                '#D3D3D3',  # Dishwasher
                '#ff2200',  # Cooking Range
                '#632C94',  # Well Pump
                '#ff7300',  # Pool/Spa Pump
                '#FFB239',  # Hot Water
                '#C0C0C0',  # Ceiling Fan
                '#FF79AD',  # Vent Fans
                '#632C94',  # HVAC Fan/Pump
                '#0071BD',  # Cooling
                '#EF1C21',  # Heating
                '#1adb61',  # Electric Vehicle
                '#4748a8'  # PV
            ]

ymin = min(
    df_plot["pv"].min(),
    df_plot2["pv"].min(),
    df_plot3["pv"].min()
)*1.2
ymin
ymax = max(
    df_plot.sum(axis=1).max(),
    df_plot2.sum(axis=1).max(),
    df_plot3.sum(axis=1).max()
)*0.75
print(f"ymin: {ymin}, ymax: {ymax}")

In [None]:
fig = plt.figure(figsize=(15, 10))

fig_n = 0
for season in seasons:
    ### [1] ###
    fig_n = fig_n + 1
    ax = plt.subplot(3, 3, fig_n)
    sim_run = "Old-ResStock"

    df_diurnal = df_plot.loc[df_plot.index.month.isin(seasons[season])]
    df_diurnal = df_diurnal.groupby(df_diurnal.index.hour).mean()
    missing_enduses = list(set(enduses).difference(set(df_diurnal.columns)))
    if missing_enduses:
        for eu in missing_enduses:
            df_diurnal[eu] = np.nan
    df_diurnal[enduses].plot(kind="area", stacked=True, ax=ax, color=color_list, legend=False, ls='None')
    ax.set_ylabel(metric)
    ax.set_ylim(ymin=ymin, ymax=ymax)
    ax.margins(x=0)
    ax.set_xticks(np.arange(df_diurnal.index[0], df_diurnal.index[-1], 3))
    ax.set_title(f'{sim_run}-{season}')

    ### [2] ###
    fig_n = fig_n + 1
    ax = plt.subplot(3, 3, fig_n)
    sim_run2 = "HPXML"

    df_diurnal2 = df_plot2.loc[df_plot2.index.month.isin(seasons[season])]
    df_diurnal2 = df_diurnal2.groupby(df_diurnal2.index.hour).mean()
    missing_enduses = list(set(enduses).difference(set(df_diurnal2.columns)))
    if missing_enduses:
        for eu in missing_enduses:
            df_diurnal2[eu] = np.nan
            
    df_diurnal2[enduses].plot(kind="area", stacked=True, ax=ax, color=color_list, legend=False, ls='None')
    ax.set_ylim(ymin=ymin, ymax=ymax)
    ax.margins(x=0)
    ax.set_xticks(np.arange(df_diurnal2.index[0], df_diurnal2.index[-1], 3))
    ax.set_title(f'{sim_run2} - {season}')

    ### [3] ###
    fig_n = fig_n + 1
    ax = plt.subplot(3, 3, fig_n)
    sim_run3 = "2015_reference"

    df_diurnal3 = df_plot3.loc[df_plot3.index.month.isin(seasons[season])]
    df_diurnal3 = df_diurnal3.groupby(df_diurnal3.index.hour).mean()
    missing_enduses = list(set(enduses).difference(set(df_diurnal3.columns)))
    if missing_enduses:
        for eu in missing_enduses:
            df_diurnal3[eu] = np.nan
            
    df_diurnal3[enduses].plot(kind="area", stacked=True, ax=ax, color=color_list, legend=False, ls='None')
    ax.set_ylim(ymin=ymin, ymax=ymax)
    ax.margins(x=0)
    ax.set_xticks(np.arange(df_diurnal3.index[0], df_diurnal3.index[-1], 3))
    ax.set_title(f'{sim_run3} - {season}')
    
ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles[::-1], labels[::-1], loc="center left", bbox_to_anchor=(1, 0.5))
fig.tight_layout()

figfile = figure_output_dir / "enduse_comparison_baseline.pdf"
fig.savefig(figfile)


### 3.2. Plot EULP Visualization plots
#### 3.2.1. Initialize EZVIZ Object

In [None]:
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

# 2018 Definition for the weather file Wilmington NC # <-- fix this
ezviz = EZVIZ(
    stock = 'resstock',
    summer_months = [5, 6, 7, 8, 9],
    shoulder_months = [4, 10, 11],
    winter_months = [1, 2, 3, 12],
    figure_output_dir = str(figure_output_dir),
)

#### 3.2.2. Select data

In [None]:
truth_data_label = [f"{lrd_year}_LRD"]
buildstock_data_label = ["2012_old_resstock_mshp_run", "2012_hpxml_mshp_run", "2015_reference_old_la100"]

#### auto-set plotting types

In [None]:
if lrd_year == 2012:
    plots = "all"
elif lrd_year % 4 != 0:
    # not leap year
    plots = [
        "day_type_comparison",
        "day_type_comparison_stacked_by_enduse",
        "top_10_days_day_type_comparison",
]
else:
    plots = [
        "plot_load_duration_curve",
        "plot_load_duration_curve_top_100_hrs",
        "plot_annual_total_bar_chart",
        "day_type_comparison",
        "day_type_comparison_stacked_by_enduse",
        "bar_chart_of_annual_hourly_electricity_error",
        "top_10_days_day_type_comparison",
]

In [None]:
# Downselect to customer class
cohort = 'Total Residential Stock'
data_filter = aggregated_dataset_raw['cohort'] == cohort
aggregated_dataset = aggregated_dataset_raw.loc[data_filter]

# Generate calibration graphics
ezviz.generate_plots_for_cohort(
    aggregated_dataset = aggregated_dataset.set_index(['timestamp']),
    buildstock_data_label = buildstock_data_label,
    truth_data_label = truth_data_label,
    cohort = cohort,
    enduse_list = enduse_categories.abbreviated_enduse_list(),
    energy_column = 'kwh',
    plots = plots,
)

### export to HTML
This can take a while

In [None]:
this_file = Path(".").resolve() / "ResStock_LA_visualization_notebook.ipynb"
cmd = f"jupyter nbconvert --execute --to html {str(this_file)}"
os.system(cmd)