In [None]:
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import HTML

# Allow autoreload as we develop the GMT in parallel
%load_ext autoreload
%autoreload 2

dataset_path = Path().resolve() / "datasets"
# get the full path
print(f"Dataset path: {dataset_path}")

data_path = dataset_path / "comstock"
# get the full path
print(f"Data path: {data_path}")

figures_path = Path().resolve() / "figures" / "cec"
if not figures_path.exists():
    figures_path.mkdir(parents=True)
print(f"Figures path: {figures_path}")

warnings.filterwarnings("ignore", category=FutureWarning)
# ignore SettingWithCopyWarning
pd.options.mode.chained_assignment = None

# ComStock

### Pull down ComStock data

To do this, you should be able to just simply run the `comstock_processor.py` file within VSCode. The data will be saved into 
the ComStock subfolder. It takes a while to run (10ish minutes).

In [None]:
# %run comstock_processor.py

### Read in ComStock data

In [None]:
# read in the comstock exported file, yes we could just read the parquet, maybe update?
df_all = pd.read_csv(data_path / "All-All-All-0-selected_metadata.csv")

In [None]:
# get the dimensions
print(df_all.shape)
# show all states
print(df_all["in.state"].unique())
# show all the building types
print(df_all["in.comstock_building_type"].unique())

In [None]:
# save the names of the fields to a list
fields = df_all.columns
with open(data_path / "fields.txt", "w") as f:
    f.write("\n".join(fields))

In [None]:
# only buildings in CA
df_ca = df_all[df_all["in.state"].str.contains("CA")]
print(f"all: {df_all.shape}")
print(f"cz7: {df_ca.shape}")
for to_display in ["in.building_subtype", "in.comstock_building_type"]:
    df_to_show = df_ca[to_display].value_counts()
    df_to_show = df_to_show.reset_index()
    display(HTML(df_to_show.to_html(index=False, border=1)))

In [None]:
# calculate the of buildings in each 'in.comstock_building_type'
# print(df_ca["in.comstock_building_type"].value_counts())

county_counts = df_ca["in.county_name"].value_counts()
# make it a dataframe
county_counts = county_counts.reset_index()
# in the in.county_name remove CA, and County
county_counts["in.county_name"] = county_counts["in.county_name"].str.replace("CA,", "").str.replace("County", "")
# and trim the remainder
county_counts["in.county_name"] = county_counts["in.county_name"].str.strip()

# display the counts nicely, but with no index
display(HTML(county_counts.to_html(index=False, border=1)))

In [None]:
import geopandas as gpd

# Load a built-in dataset of US counties (you need geopandas installed)
counties_gdf = gpd.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")

# Filter for California counties (FIPS state code for CA is '06')
california_counties = counties_gdf[counties_gdf["STATE"] == "06"]

# convert county_counts to a dictionary of format {county_name: ["a", "b", "c"], count: [1, 2, 3]}
county_mapping = dict(zip(county_counts["in.county_name"], county_counts["count"]))
california_counties["count"] = california_counties["NAME"].map(county_mapping)

# Plot the map on a logscale
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
california_counties.plot(
    column="count",
    cmap="Oranges",
    legend=True,
    missing_kwds={"color": "lightgrey"},
    ax=ax,
)
plt.title("Count of ComStock Buildings in CA", fontsize=16)
plt.show()

# create another plot but without Los Angeles
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
california_counties[california_counties["NAME"] != "Los Angeles"].plot(
    column="count",
    cmap="Oranges",
    legend=True,
    missing_kwds={"color": "lightgrey"},
    ax=ax,
)
plt.title("Count of ComStock Buildings in CA (without LA)", fontsize=16)

In [None]:
# create a pie chart of the total energy in kWh of district cooling, district heating, electricity, natural gas
meters = [
    "out.district_cooling.total.energy_consumption",
    "out.district_heating.total.energy_consumption",
    "out.electricity.total.energy_consumption",
    "out.natural_gas.total.energy_consumption",
    "out.other_fuel.total.energy_consumption",
]

# create a new dataframe with the sum of each meter
df_meters = df_ca[meters].sum()
print(df_meters)
# rename the columns to be shorter, electricity, natural gas, distirct heating,
# district cooling, and other fuel
df_meters = df_meters.rename(
    {
        "out.district_cooling.total.energy_consumption": "District Cooling",
        "out.district_heating.total.energy_consumption": "District Heating",
        "out.electricity.total.energy_consumption": "Electricity",
        "out.natural_gas.total.energy_consumption": "Natural Gas",
        "out.other_fuel.total.energy_consumption": "Other Fuel",
    }
)


# plot the meter totals as a pie chart
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
plt.pie(df_meters, labels=df_meters.index, autopct="%1.1f%%")
plt.title("ComStock - Energy Consumption by Meter Type", fontsize=16)

# now show the heating fuel type by count
df_heating = df_ca["in.heating_fuel"].value_counts()
df_heating = df_heating.reset_index()
display(HTML(df_heating.to_html(index=False, border=1)))
# make a pie chart of count of heating fuels
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
plt.pie(df_heating["count"], labels=df_heating["in.heating_fuel"], autopct="%1.1f%%")
plt.title("ComStock - Heating Fuel Type by Count", fontsize=16)