In [None]:
import pandas as pd

In [None]:
DATA = "DANE2023.csv"
STATIONS = [24002, 24004, 24005, 24007, 24008, 24018, 24019, 24023]

In [None]:
df = pd.read_csv(DATA, delimiter=";")
df.head()

In [None]:
df = df.loc[:, ~df.columns.str.contains("SZAC")]
df = df[df["STACJA"].isin(STATIONS)]
df.drop(columns=["ROK", "NRPOMIARU", "TYDZ", "NRDNIA", "SDRD", "SDRDN"], inplace=True)
df.rename(columns={"STACJA": "station_id", "DATA": "date"}, inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

In [None]:
id_vars = ["station_id", "date"]

hourly_columns = [col for col in df.columns if col.startswith("G")]

df = df.melt(
    id_vars=id_vars,
    value_vars=hourly_columns,
    var_name="hour",
    value_name="vehicles_count"
)
df.head()

In [None]:
df["hour"] = df["hour"].str.extract(r"G(\d+)", expand=False).astype(int)
df["timestamp"] = (pd.to_datetime(df["date"]) + pd.to_timedelta(df["hour"] - 1, unit="h"))
df["timestamp_utc"] = df["timestamp"] - pd.Timedelta(hours=1)
df.drop(columns=["date", "hour", "timestamp"], inplace=True)
df = df.sort_values(by=["station_id", "timestamp_utc"]).reset_index(drop=True)
df = df[["timestamp_utc", "vehicles_count"]]
df["date"] = df["timestamp_utc"].dt.date
df.head()

In [None]:
df_hsum = df[["timestamp_utc", "vehicles_count"]].groupby(by="timestamp_utc", as_index=False).sum()
df_hsum = df_hsum.iloc[1:]
df_hsum.head()

In [None]:
df_dsum = df[["date", "vehicles_count"]].groupby(by="date", as_index=False).sum()
df_dsum = df_dsum.iloc[1:]
df_dsum.head()

In [None]:
yearly_average = df_dsum["vehicles_count"].mean()
yearly_average

In [None]:
df_dsum["yearly_mean_fraction"] = df_dsum["vehicles_count"] / yearly_average
df_dsum["yearly_mean_fraction"] = df_dsum["yearly_mean_fraction"].round(4)
df_dsum.head()

In [None]:
df_hsum["date"] = df_hsum["timestamp_utc"].dt.date
df_merged = pd.merge(df_hsum, df_dsum, on="date", suffixes=("_hourly", "_daily"))
df_merged["daily_fraction"] = df_merged["vehicles_count_hourly"] / df_merged["vehicles_count_daily"]
df_merged["daily_fraction"] = df_merged["daily_fraction"].round(4)
df_merged.head()

In [None]:
df_output = df_merged[["timestamp_utc", "daily_fraction", "yearly_mean_fraction"]].copy()
df_output.head()

In [None]:
df_output.to_csv("processed_traffic.csv", index=False)