In [None]:
"""
Measured occupancy might miscount or reset, especially if sensors drift.

Flow-based occupancy may miss people entering/exiting elsewhere or accumulate small errors.

The two sources are in close agreement (differences only single digits even at hundreds of occupants), which shows the sensors are generally reliable.

To balance their strengths and mitigate weaknesses, I use the average of the two.

Finally, I round up to the nearest integer so the metric always reflects a physically possible headcount (no “half people,” and no risk of undercounting near zero).
"""

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read parquet
df_occ = pd.read_parquet("data/occupancy.parquet")

In [2]:
# This is how we will take in account the in and out sensors at the entry point, which is another form of accurate occupation data 
def running_occ(flow):
    occ = []
    total = 0
    for f in flow:
        total += f
        if total < 0:
            total = 0
        occ.append(total)
    return occ

In [3]:
# Here are the important distingusing features when comparing values. Prevents mixing buildings and such
group_cols = ["organization_id","state","city", "building_name", "space_name", "date_daily"]

In [7]:
# Creates an occupancy estimate based on flow (in - out)
# Calculates an average between the two. Rounds up to nearest int to favor not missing people.
df = df_occ.copy()
df["flow"] = df["people_in"].fillna(0) - df["people_out"].fillna(0)
df["flow_occ"] = df.groupby(group_cols)["flow"].transform(running_occ)
df["avg_occ"] = np.ceil((df["flow_occ"] + df["occupancy"]) / 2).astype(int)

In [8]:
# Writes to parquet with new metric for data
df.to_parquet("occupancy_new_metric.parquet")

In [9]:
df

Unnamed: 0,organization_id,state,city,building_name,space_name,space_type,space_description,capacity,date_time,date_daily,people_in,people_out,traffic,occupancy,flow,flow_occ,avg_occ
0,63f4d9f01ae6380cd40791EY,Ontario,Toronto,Building_1,Space1_Building_1,Building,Main Entrance,800,2023-03-05 00:00:00,2023-03-05,0.0,0.0,0.0,0.0,0.0,0.0,0
1,63f4d9f01ae6380cd40791EY,Ontario,Toronto,Building_10,Space1_Building_10,Tower,Main Entrance,475,2023-03-05 00:00:00,2023-03-05,0.0,2.0,2.0,2.0,-2.0,0.0,1
2,63f4d9f01ae6380cd40791EY,Alberta,Edmonton,Building_11,Space1_Building_11,Tower,Main Entrance,590,2023-03-05 00:00:00,2023-03-05,7.0,8.0,15.0,2.0,-1.0,0.0,1
3,63f4d9f01ae6380cd40791EY,British Columbia,Vancouver,Building_12,Space1_Building_12,Tower,Main Entrance,580,2023-03-05 00:00:00,2023-03-05,3.0,2.0,5.0,5.0,1.0,1.0,3
4,63f4d9f01ae6380cd40791EY,Ontario,Toronto,Building_13,Space1_Building_13,Tower,Main Entrance,900,2023-03-05 00:00:00,2023-03-05,0.0,0.0,0.0,2.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10228,63f4d9f01ae6380cd40791EY,Michigan,Detroit,Building_41,Space1_Building_41,Building,Main Entrance,435,2023-03-11 19:00:00,2023-03-11,0.0,0.0,0.0,1.0,0.0,1.0,1
10229,63f4d9f01ae6380cd40791EY,Alberta,Edmonton,Building_5,Space2_Building_5,Tower,Main Entrance,1218,2023-03-11 19:00:00,2023-03-11,0.0,0.0,0.0,1.0,0.0,0.0,1
10230,63f4d9f01ae6380cd40791EY,Alberta,Edmonton,Building_5,Space1_Building_5,Building,Main Entrance,1218,2023-03-11 19:00:00,2023-03-11,0.0,3.0,3.0,0.0,-3.0,0.0,0
10231,63f4d9f01ae6380cd40791EY,British Columbia,Vancouver,Building_6,Space1_Building_6,Building,Main Entrance,1322,2023-03-11 19:00:00,2023-03-11,3.0,7.0,10.0,3.0,-4.0,0.0,2
