Get community building samples from euss and renormalize the sample weight

In [65]:
import pandas as pd
from pathlib import Path
import numpy as np
import getpass

user = getpass.getuser()
print(f"The user is {user}")

The user is lliu2


In [66]:
# ["louisville", "san_jose", "columbia", "north_birmingham", "jackson_county", "duluth",]
community = 'duluth' # TODO change community name

In [67]:
# set path
if user == "ylou2":
    euss_dir = 'data_/euss_res_final_2018_550k_20220901/' # TODO
elif user == "lliu2":
    euss_dir = "/Volumes/Lixi_Liu/euss_aws"
else:
    print(f"new user={user}, set euss_dir path here with an elif statement")

euss_dir = Path(euss_dir).resolve()
output_dir = Path(".").resolve() / "data_" / "community_building_samples" / community
output_dir.mkdir(exist_ok=True)

print(f"euss_dir: {euss_dir}")
print(f"output_dir: {output_dir}")

euss_dir: /Volumes/Lixi_Liu/euss_aws
output_dir: /Users/lliu2/Documents/GitHub/resstock/euss_cleap/data_/community_building_samples/duluth


In [68]:
building_id_weight = pd.read_csv(f'data_/downsampled_buildings_id/euss1_2018_results_up00__downsampled_method1__{community}.csv')
building_id_weight['building_id'] = building_id_weight['building_id'].astype(int)
building_id_weight = building_id_weight.set_index('building_id')

print(f"building_id_weight for {community} has {len(building_id_weight)} rows")
df = []
for i in range(11):
    euss_up = pd.read_parquet(euss_dir / f'results_up{i:02d}.parquet', engine='pyarrow')
    euss_up["building_id"] = euss_up["building_id"].astype(int)
    community_up = euss_up.join(building_id_weight, on="building_id", how="right")
    n_downselected = len(community_up)

    community_up = community_up.loc[community_up["completed_status"] == "Success"].reset_index(drop=True) # drop failed sims
    community_up.to_parquet(output_dir / f"up{i:02d}.parquet")
    df.append(community_up)
    
    print(f" - for up{i:02d}, community_up has {n_downselected} rows after downselection, and {len(community_up)} rows after dropping failed sims")

print("all done!")


building_id_weight for duluth has 1217 rows
 - for up00, community_up has 1217 rows after downselection, and 1217 rows after dropping failed sims
 - for up01, community_up has 1217 rows after downselection, and 1048 rows after dropping failed sims
 - for up02, community_up has 1217 rows after downselection, and 1093 rows after dropping failed sims
 - for up03, community_up has 1217 rows after downselection, and 1217 rows after dropping failed sims
 - for up04, community_up has 1217 rows after downselection, and 1217 rows after dropping failed sims
 - for up05, community_up has 1217 rows after downselection, and 1159 rows after dropping failed sims
 - for up06, community_up has 1217 rows after downselection, and 1214 rows after dropping failed sims
 - for up07, community_up has 1217 rows after downselection, and 1037 rows after dropping failed sims
 - for up08, community_up has 1217 rows after downselection, and 1217 rows after dropping failed sims
 - for up09, community_up has 1217 row

### Get summary

In [69]:
def weighted_mean(x, metric: str):
    if len(x[metric].dropna()) == 0:
        res = 0
    else:
        res = (x[metric] * x["sample_weight"]).sum()/x["sample_weight"].sum()
    col_name = f"{metric} wt_mean"
    return pd.Series({col_name: res})

def weighted_percentile(x, metric: str, percents: int):
    col_name = f"{metric} wt_p{percents}"
    res = _weighted_percentile(x[metric], percents, weights=x["sample_weight"])
    return pd.Series({col_name: res})

def _weighted_percentile(data, percents: int, weights=None):
    """
    perc : percentile in 0 - 1
    """
    # dropna
    idx = ~np.isnan(data) & ~np.isnan(weights)
    data = np.array(data)[idx]
    if len(data) == 0:
        return np.nan
    if weights is None:
        return np.percentile(data, percents)
    weights = np.array(weights)[idx]
    ix = np.argsort(data)
    data = data[ix] # sort data
    weights = weights[ix] # sort weights
    cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
    return np.interp(percents, cdf, data)


In [70]:
df = pd.concat(df, axis=0)
df["apply_upgrade.upgrade_name"] = df["apply_upgrade.upgrade_name"].fillna("Baseline")

  df = pd.concat(df, axis=0)


In [71]:
metric = 'report_simulation_output.energy_use_total_m_btu'
dfgb = df.groupby(["apply_upgrade.upgrade_name"])
pd.concat([
    dfgb["sample_weight"].count().rename("n_samples"),
    dfgb["sample_weight"].sum().rename("n_represented"),
    dfgb.apply(lambda x: weighted_mean(x, metric)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.25)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.75)),
], axis=1)

Unnamed: 0_level_0,n_samples,n_represented,report_simulation_output.energy_use_total_m_btu wt_mean,report_simulation_output.energy_use_total_m_btu wt_p0.25,report_simulation_output.energy_use_total_m_btu wt_p0.75
apply_upgrade.upgrade_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Baseline,1217,39762.0,147.793793,71.942865,201.213639
Basic enclosure,1048,34612.492977,130.320456,69.348793,175.333901
Enhanced enclosure,1093,35800.50936,121.364162,65.143043,162.016215
Heat pump water heaters,1214,39724.904866,141.993144,67.439116,194.421378
"Heat pumps, high-efficiency, electric backup",1217,39762.0,77.51518,37.271665,104.507418
"Heat pumps, min-efficiency, electric backup",1217,39762.0,97.908877,46.735533,132.537216
"Heat pumps, min-efficiency, existing heating as backup",1159,37835.613214,126.834223,56.252708,172.517461
"Whole-home electrification, high efficiency",1217,39762.0,64.603593,28.122503,88.691239
"Whole-home electrification, high efficiency + basic enclosure package (packages 1 & 8)",1217,39762.0,54.701193,25.268272,76.858865
"Whole-home electrification, high efficiency + enhanced enclosure package (packages 2 & 8)",1217,39762.0,53.385662,24.846969,74.006979


In [72]:
metric = 'report_simulation_output.fuel_use_electricity_total_m_btu'
dfgb = df.groupby(["apply_upgrade.upgrade_name"])
pd.concat([
    dfgb["sample_weight"].count().rename("n_samples"),
    dfgb["sample_weight"].sum().rename("n_represented"),
    dfgb.apply(lambda x: weighted_mean(x, metric)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.25)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.75)),
], axis=1)

Unnamed: 0_level_0,n_samples,n_represented,report_simulation_output.fuel_use_electricity_total_m_btu wt_mean,report_simulation_output.fuel_use_electricity_total_m_btu wt_p0.25,report_simulation_output.fuel_use_electricity_total_m_btu wt_p0.75
apply_upgrade.upgrade_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Baseline,1217,39762.0,31.157455,13.199085,34.923611
Basic enclosure,1048,34612.492977,28.584726,13.043937,32.302919
Enhanced enclosure,1093,35800.50936,27.755937,12.933563,32.26897
Heat pump water heaters,1214,39724.904866,31.593165,14.976959,35.924264
"Heat pumps, high-efficiency, electric backup",1217,39762.0,64.969272,27.297029,88.127367
"Heat pumps, min-efficiency, electric backup",1217,39762.0,85.375044,39.096341,117.355257
"Heat pumps, min-efficiency, existing heating as backup",1159,37835.613214,38.672617,20.028739,49.861986
"Whole-home electrification, high efficiency",1217,39762.0,64.209171,27.521654,88.607931
"Whole-home electrification, high efficiency + basic enclosure package (packages 1 & 8)",1217,39762.0,54.306766,24.849589,75.642254
"Whole-home electrification, high efficiency + enhanced enclosure package (packages 2 & 8)",1217,39762.0,52.991236,24.646781,73.625533


In [73]:
metric = 'report_simulation_output.fuel_use_natural_gas_total_m_btu'
dfgb = df.groupby(["apply_upgrade.upgrade_name"])
pd.concat([
    dfgb["sample_weight"].count().rename("n_samples"),
    dfgb["sample_weight"].sum().rename("n_represented"),
    dfgb.apply(lambda x: weighted_mean(x, metric)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.25)),
    dfgb.apply(lambda x: weighted_percentile(x, metric, percents=0.75)),
], axis=1)

Unnamed: 0_level_0,n_samples,n_represented,report_simulation_output.fuel_use_natural_gas_total_m_btu wt_mean,report_simulation_output.fuel_use_natural_gas_total_m_btu wt_p0.25,report_simulation_output.fuel_use_natural_gas_total_m_btu wt_p0.75
apply_upgrade.upgrade_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Baseline,1217,39762.0,99.981415,10.362039,155.486208
Basic enclosure,1048,34612.492977,88.094716,14.091929,135.155361
Enhanced enclosure,1093,35800.50936,80.757275,12.666135,123.94552
Heat pump water heaters,1214,39724.904866,93.679243,0.0,146.490887
"Heat pumps, high-efficiency, electric backup",1217,39762.0,11.950861,0.0,18.065921
"Heat pumps, min-efficiency, electric backup",1217,39762.0,11.939329,0.0,18.067619
"Heat pumps, min-efficiency, existing heating as backup",1159,37835.613214,75.161307,4.320308,126.987365
"Whole-home electrification, high efficiency",1217,39762.0,0.394426,0.0,0.0
"Whole-home electrification, high efficiency + basic enclosure package (packages 1 & 8)",1217,39762.0,0.394426,0.0,0.0
"Whole-home electrification, high efficiency + enhanced enclosure package (packages 2 & 8)",1217,39762.0,0.394426,0.0,0.0
