In [10]:
import polars as pl
import datetime as dt

In [11]:
pth_data = f"C:/Users/z187070/Documents/Projects/PAXCOUNTER/data/"

In [12]:
region_name = "Ost"
mngmt_name = "Mecklenburg-Vorpommern"
mngmt_filter = "Mecklenburg-Vorpommern"

In [13]:
pth_pax = pth_data + f"pax_data_{region_name}_{mngmt_name}.parquet"

## create one file per pax sensor

* needs sensor id
* moving average values (max, min, mean)

In [14]:
df_pax = pl.read_parquet(pth_pax).select(
        pl.col("pax_counter_id"),
        pl.col("time_iot").str.to_datetime(),
        pl.col("data_pax"),
        pl.col("station_id"),
        pl.col("station_name"),
        pl.col("tpname")
    ).filter(
        pl.col("time_iot") > dt.datetime(2025,4,1)
    )

In [15]:
df_pax.head()

pax_counter_id,time_iot,data_pax,station_id,station_name,tpname
str,datetime[μs],i64,i64,str,str
"""083af23fd0df""",2025-04-01 00:00:20,0,719,"""Bobitz""","""Bahnsteig 02"""
"""083af23fd0df""",2025-04-01 00:01:20,0,719,"""Bobitz""","""Bahnsteig 02"""
"""083af23fd0df""",2025-04-01 00:02:20,0,719,"""Bobitz""","""Bahnsteig 02"""
"""083af23fd0df""",2025-04-01 00:03:20,0,719,"""Bobitz""","""Bahnsteig 02"""
"""083af23fd0df""",2025-04-01 00:04:20,0,719,"""Bobitz""","""Bahnsteig 02"""


In [16]:
pax_sensors = df_pax.select(pl.col("pax_counter_id").unique()).to_series().to_list()
len(pax_sensors)

106

In [17]:
pax_sensor = pax_sensors[0]
pax_sensor

'244cab0712df'

In [18]:
for pax_sensor in pax_sensors:
    agg_data = df_pax.filter(
            pl.col("pax_counter_id") == pax_sensor
        ).sort(
            pl.col("time_iot")
        ).with_columns(
            pl.col("time_iot").dt.date().alias("iot_date"),
            pl.col("time_iot").dt.hour().alias("iot_hour"),
            pl.col("time_iot").dt.minute().alias("iot_minute")
        ).with_columns(
            (pl.col("iot_minute") / 10).cast(pl.Int16).alias("iot_minute_group")
        ).group_by(
            ["iot_date", "iot_hour", "iot_minute_group"]
        ).agg(
            pl.col("pax_counter_id").min(),
            pl.col("station_id").min(),
            pl.col("tpname").min(),
            pl.col("time_iot").min().alias("time_iot_min"),
            pl.col("time_iot").max().alias("time_iot_max"),
            pl.col("data_pax").mean().round(2).alias("pax_10min_mean"),
            pl.col("data_pax").min().alias("pax_10min_min"),
            pl.col("data_pax").max().alias("pax_10min_max")
        ).drop(
            ["iot_hour", "iot_minute_group"]
        ).sort(
            "time_iot_min"
        ).with_columns(
            pl.col("pax_10min_mean").diff().alias("pax_10min_mean_diff"),
            pl.col("pax_10min_min").diff().alias("pax_10min_min_diff"),
            pl.col("pax_10min_max").diff().alias("pax_10min_max_diff"),
        )

    agg_data.write_parquet(f"../../data/moving_average/moving_average_{region_name}_{mngmt_name}_pax{pax_sensor}.parquet")
    