In [1]:
import pandas as pd

In [2]:
# load seattle data

seattle_df = pd.read_csv(
    "https://docs.google.com/uc?export=download&id=1X-6yvLm5z8K6_J-RIHfXuMEuvQcj-pus"
)

In [3]:
seattle_df.columns

Index(['OSEBuildingID', 'DataYear', 'BuildingName', 'BuildingType',
       'TaxParcelIdentificationNumber', 'Address', 'City', 'State', 'ZipCode',
       'Latitude', 'Longitude', 'Neighborhood', 'CouncilDistrictCode',
       'YearBuilt', 'NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal',
       'PropertyGFABuildings', 'PropertyGFAParking', 'SelfReportGFATotal',
       'SelfReportGFABuildings', 'SelfReportParking', 'ENERGYSTARScore',
       'SiteEUIWN(kBtu/sf)', 'SiteEUI(kBtu/sf)', 'SiteEnergyUse(kBtu)',
       'SiteEnergyUseWN(kBtu)', 'SourceEUIWN(kBtu/sf)', 'SourceEUI(kBtu/sf)',
       'EPAPropertyType', 'LargestPropertyUseType',
       'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseType',
       'SecondLargestPropertyUseTypeGFA', 'ThirdLargestPropertyUseType',
       'ThirdLargestPropertyUseTypeGFA', 'Electricity(kWh)', 'SteamUse(kBtu)',
       'NaturalGas(therms)', 'ComplianceStatus', 'ComplianceIssue',
       'Electricity(kBtu)', 'NaturalGas(kBtu)', 'TotalGHGEmission

In [4]:
# classification based on seattle policy
property_type_classification = {
    # residential
    "Multifamily HR (10+)": "Residential",
    "Multifamily MR (5-9)": "Residential",
    "Multifamily LR (1-4)": "Residential",
    # non-residential
    "NonResidential": "Non-Residential",
    "Nonresidential COS": "Non-Residential",
}

In [5]:
# classification projection
seattle_df["Property Type Category"] = seattle_df["BuildingType"].map(
    property_type_classification
)

In [6]:
# fitter buildings based on the seattle policy
valid_buildings = seattle_df[
    (seattle_df["PropertyGFATotal"] > 20000)
    & (seattle_df["DataYear"] >= 2016)
    & (seattle_df["ComplianceStatus"] == "Compliant")
    & (
        ~seattle_df["ComplianceIssue"].str.contains(
            "Missing EUI|Incomplete Fuel", na=False
        )
    )
].copy()

In [7]:
# 1 therm = 100 kBtu
valid_buildings["NaturalGas(kBtu)"] = valid_buildings["NaturalGas(therms)"] * 100

# define direct emission (only natural gas)
direct_energy_columns = ["NaturalGas(kBtu)"]

# calculate total energy
valid_buildings["Direct Energy Use (kBtu)"] = valid_buildings[
    direct_energy_columns
].sum(axis=1)

In [8]:
kBtu_to_kWh = 0.2931
ft2_to_m2 = 0.092903

# calculate direct EUI（kWh/m²）
valid_buildings["Direct EUI (kWh/m²)"] = (
    valid_buildings["Direct Energy Use (kBtu)"] * kBtu_to_kWh
) / (valid_buildings["PropertyGFATotal"] * ft2_to_m2)

# calculate EUI depends on differet types
valid_buildings["Residential EUI (kWh/m²/year)"] = valid_buildings.apply(
    lambda x: (
        x["Direct EUI (kWh/m²)"]
        if x["Property Type Category"] == "Residential"
        else None
    ),
    axis=1,
)

valid_buildings["Non-residential EUI (kWh/m²/year)"] = valid_buildings.apply(
    lambda x: (
        x["Direct EUI (kWh/m²)"]
        if x["Property Type Category"] == "Non-Residential"
        else None
    ),
    axis=1,
)

In [9]:
# final seattle result
result_df = valid_buildings[
    [
        "Latitude",
        "Longitude",
        "Residential EUI (kWh/m²/year)",
        "Non-residential EUI (kWh/m²/year)",
    ]
].rename(columns={"Latitude": "latitude", "Longitude": "longitude"})
result_df

Unnamed: 0,latitude,longitude,Residential EUI (kWh/m²/year),Non-residential EUI (kWh/m²/year)
0,47.61220,-122.33799,,57.073237
1,47.61307,-122.33361,,117.494765
2,47.61367,-122.33822,,19.470379
4,47.61375,-122.34047,,145.411619
5,47.61623,-122.33657,,102.003834
...,...,...,...,...
27463,47.55976,-122.38320,24.314778,
27464,47.61791,-122.30230,45.174678,
27466,47.70562,-122.32319,,0.000000
27467,47.63191,-122.31523,,634.533247


##### The missing values (NaN) in the results reflect the mutually exclusive nature of building classifications—each building belongs exclusively to either the residential or non-residential category. For example, in Row 5, a non-residential building with an EUI of 164.3 kWh/m²/year has a NaN value in the "Residential EUI" column, indicating that this building does not fall under the residential category.

In [None]:
# result_df.to_csv("seattle_processed_eui.csv", index=False)