In [1]:
import pandas as pd

pd.options.mode.chained_assignment = None

In [2]:
# load boston data

boston_df = pd.read_csv(
    "https://docs.google.com/uc?export=download&id=1o3SmzAzOH-O0RcbUblHuyDcYOsqp_vrb"
)

In [3]:
# classification based on boston policy
property_type_classification = {
    "residential": {
        "Multifamily Housing",
        "Single Family Home",
        "Residence Hall/Dormitory",
        "Senior Living Community",
        "Residential Care Facility",
        "Other - Lodging/Residential",
    },
    "non-residential": {
        "Office",
        "Laboratory",
        "Manufacturing/Industrial Plant",
        "Self-Storage Facility",
        "Non-Refrigerated Warehouse",
        "Hotel",
        "Distribution Center",
        "Financial Office",
        "Wholesale Club/Supercenter",
        "Medical Office",
        "Convenience Store without Gas Station",
        "Supermarket/Grocery Store",
        "Pre-school/Daycare",
        "College/University",
        "K-12 School",
        "Hospital (General Medical & Surgical)",
        "Ambulatory Surgical Center",
        "Urgent Care/Clinic/Other Outpatient",
        "Restaurant",
        "Bank Branch",
        "Library",
        "Data Center",
        "Convention Center",
        "Prison/Incarceration",
        "Movie Theater",
        "Stadium (Open)",
        "Indoor Arena",
        "Strip Mall",
        "Refrigerated Warehouse",
        "Food Sales",
        "Fire Station",
        "Police Station",
        "Courthouse",
        "Zoo",
        "Energy/Power Station",
        "Repair Services (Vehicle, Shoe, Locksmith, etc.)",
        "Other - Services",
        "Other - Technology/Science",
        "Other - Education",
        "Other - Entertainment/Public Assembly",
        "Other - Utility",
        "Other - Public Services",
        "Other - Recreation",
        "Other - Stadium",
        "Other - Mall",
        "Other/Specialty Hospital",
        "Social/Meeting Hall",
        "Vocational School",
        "Food Service",
        "Vehicle Dealership",
        "Performing Arts",
        "Fitness Center/Health Club/Gym",
        "Movie Theater",
        "Race Track",
        "Adult Education",
        "Outpatient Rehabilitation/Physical Therapy",
        "Retail Store",
        "Fast Food Restaurant",
        "Vehicle Repair Services",
        "Ice/Curling Rink",
        "Enclosed Mall",
        "Museum",
        "Worship Facility",
    },
}

In [6]:
# create a new category
def classify_property_type(property_type):
    if property_type in property_type_classification["residential"]:
        return "Residential"
    elif property_type in property_type_classification["non-residential"]:
        return "Non-Residential"
    else:
        return "Unknown"


boston_df["Property Type Category"] = boston_df["Largest Property Type"].apply(
    classify_property_type
)

In [7]:
# fitter buildings based on the seattle policy

boston_df["Reported Gross Floor Area (Sq Ft)"] = pd.to_numeric(
    boston_df["Reported Gross Floor Area (Sq Ft)"], errors="coerce"
)
boston_df["Reported Enclosed Parking Area (Sq Ft)"] = pd.to_numeric(
    boston_df["Reported Enclosed Parking Area (Sq Ft)"], errors="coerce"
)

# no parking area
boston_df["GFA Excluding Parking"] = boston_df[
    "Reported Gross Floor Area (Sq Ft)"
] - boston_df["Reported Enclosed Parking Area (Sq Ft)"].fillna(0)

# Residential
residential_berdo = (boston_df["Property Type Category"] == "Residential") & (
    (boston_df["GFA Excluding Parking"] >= 20000)  # GFA ≥ 20,000 sqft
    | (boston_df["Tax Parcel ID"].notna())
)

# Non-Residential
non_residential_berdo = (boston_df["Property Type Category"] == "Non-Residential") & (
    boston_df["Reported Gross Floor Area (Sq Ft)"] >= 20000  # GFA ≥ 20,000 sqft
)

# year meet the BERDO requirement
if "First Emissions Compliance Year (Projected)" in boston_df.columns:
    boston_df["First Emissions Compliance Year (Projected)"] = pd.to_numeric(
        boston_df["First Emissions Compliance Year (Projected)"], errors="coerce"
    )
    valid_year = boston_df["First Emissions Compliance Year (Projected)"].between(
        2013, 2025, inclusive="both"
    )
else:
    valid_year = True


valid_buildings = boston_df[(residential_berdo | non_residential_berdo) & valid_year]

In [8]:
# area
valid_buildings["Natural Gas Usage (kBtu)"] = pd.to_numeric(
    valid_buildings["Natural Gas Usage (kBtu)"], errors="coerce"
)
valid_buildings["Reported Gross Floor Area (Sq Ft)"] = pd.to_numeric(
    valid_buildings["Reported Gross Floor Area (Sq Ft)"], errors="coerce"
)
valid_buildings["Reported Enclosed Parking Area (Sq Ft)"] = pd.to_numeric(
    valid_buildings["Reported Enclosed Parking Area (Sq Ft)"], errors="coerce"
)

# remove parking GFA
valid_buildings["GFA Excluding Parking"] = valid_buildings[
    "Reported Gross Floor Area (Sq Ft)"
] - valid_buildings["Reported Enclosed Parking Area (Sq Ft)"].fillna(0)

In [9]:
# kWh → kBtu
valid_buildings["Electricity Usage (kBtu)"] = (
    valid_buildings["Electricity Usage (kWh)"] * 3.412
)

# energy
direct_energy_columns = [
    "Natural Gas Usage (kBtu)",
    "Fuel Oil 1 Usage (kBtu)",
    "Fuel Oil 2 Usage (kBtu)",
    "Fuel Oil 4 Usage (kBtu)",
    "Fuel Oil 5 and 6 Usage (kBtu)",
    "Diesel Usage (kBtu)",
    "Propane Usage (kBtu)",
    "District Steam Usage (kBtu)",
    "District Hot Water Usage (kBtu)",
    "District Chilled Water Usage (kBtu)",
    "Electricity Usage (kBtu)",
]

indirect_energy_columns = [
    "Electricity Usage (kWh)",
    "Renewable System Electricity Usage Onsite (kBtu)",
]

valid_buildings["Direct Energy Use (kBtu)"] = valid_buildings[
    direct_energy_columns
].sum(axis=1, min_count=1)
valid_buildings["Indirect Energy Use (kBtu)"] = valid_buildings[
    indirect_energy_columns
].sum(axis=1, min_count=1)

# total
valid_buildings["Total Energy Use (kBtu)"] = valid_buildings[
    direct_energy_columns + indirect_energy_columns
].sum(axis=1, min_count=1)

kBtu_to_kWh = 0.2931
ft2_to_m2 = 0.092903

valid_buildings["Direct EUI (kWh/m²)"] = (
    valid_buildings["Direct Energy Use (kBtu)"] * kBtu_to_kWh
) / (valid_buildings["GFA Excluding Parking"] * ft2_to_m2)


valid_buildings = valid_buildings[valid_buildings["Direct Energy Use (kBtu)"] != 0]


valid_buildings["Residential EUI (kWh/m²/year)"] = valid_buildings.apply(
    lambda x: (
        x["Direct EUI (kWh/m²)"]
        if x["Property Type Category"] == "Residential"
        else None
    ),
    axis=1,
)

valid_buildings["Non-Residential EUI (kWh/m²/year)"] = valid_buildings.apply(
    lambda x: (
        x["Direct EUI (kWh/m²)"]
        if x["Property Type Category"] == "Non-Residential"
        else None
    ),
    axis=1,
)

In [10]:
# add Residential and Non-Residential Area calculation
valid_buildings["Residential Area"] = valid_buildings.apply(
    lambda x: (
        x["GFA Excluding Parking"]
        if x["Property Type Category"] == "Residential"
        else None
    ),
    axis=1,
)

valid_buildings["Non Residential Area"] = valid_buildings.apply(
    lambda x: (
        x["GFA Excluding Parking"]
        if x["Property Type Category"] == "Non-Residential"
        else None
    ),
    axis=1,
)

In [11]:
valid_buildings["latitude"] = 42.3555
valid_buildings["longitude"] = -71.0565

In [12]:
eui_boston_df = valid_buildings[
    [
        "latitude",
        "longitude",
        "Residential EUI (kWh/m²/year)",
        "Non-Residential EUI (kWh/m²/year)",
        "Residential Area",
        "Non Residential Area",
    ]
]

eui_boston_df

Unnamed: 0,latitude,longitude,Residential EUI (kWh/m²/year),Non-Residential EUI (kWh/m²/year),Residential Area,Non Residential Area
0,42.3555,-71.0565,,211.366466,,236688.0
1,42.3555,-71.0565,175.364314,,135344.0,
2,42.3555,-71.0565,,117.565942,,255383.0
3,42.3555,-71.0565,,174.410412,,134789.0
4,42.3555,-71.0565,,192.669750,,455682.0
...,...,...,...,...,...,...
5520,42.3555,-71.0565,,142.114941,,234444.0
5525,42.3555,-71.0565,321.600815,,27770.0,
5533,42.3555,-71.0565,287.845271,,42544.0,
5536,42.3555,-71.0565,221.573012,,48373.0,


In [13]:
eui_boston_df.to_csv("../data/02_interim/eui_boston.csv", index=False)