# 04 — Feature Engineering & KPIs

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

ROOT = Path.cwd().parents[0]                    # repo root (adjust if needed)
INTERIM = ROOT / "data" / "interim"
AN      = ROOT / "analytics" / "looker_studio_datasources"
AN.mkdir(parents=True, exist_ok=True)

print("ROOT:", ROOT)
assert (ROOT/".git").exists(), "Not at repo root—adjust ROOT so ROOT/'.git' exists."


ROOT: /Users/poojithraj/Documents/melbourne-foot-traffic-marketing


In [3]:
df = pd.read_csv(INTERIM / "counts_weather_hourly.csv", parse_dates=["date_time"])
need = {"sensor_id","hourly_counts","temperature_2m","precipitation"}
missing = need - set(df.columns)
assert not missing, f"Missing columns: {missing}"
print(df.shape, df.date_time.min(), df.date_time.max())


(64040, 7) 2025-03-01 00:00:00+11:00 2025-03-31 23:00:00+11:00


In [None]:
print('TODO')

In [4]:
df["date"]       = df["date_time"].dt.date
df["hour"]       = df["date_time"].dt.hour
df["dow"]        = df["date_time"].dt.dayofweek            # 0=Mon
df["dow_name"]   = df["date_time"].dt.day_name()
df["is_weekend"] = df["dow"].isin([5,6])
df["month"]      = df["date_time"].dt.month

df["rain_mm"]    = pd.to_numeric(df["precipitation"], errors="coerce").fillna(0)
df["rain_flag"]  = (df["rain_mm"] > 0).astype(int)

df["temp_c"]     = pd.to_numeric(df["temperature_2m"], errors="coerce")
bins   = [-100, 10, 18, 24, 30, 100]
labels = ["<10°C", "10–18°C", "18–24°C", "24–30°C", ">30°C"]
df["temp_bin"]   = pd.cut(df["temp_c"], bins=bins, labels=labels)

print("rows:", len(df), "| dates:", df.date_time.min(), "→", df.date_time.max())


rows: 64040 | dates: 2025-03-01 00:00:00+11:00 → 2025-03-31 23:00:00+11:00


In [5]:
daily = (df.groupby(["sensor_id","date"], as_index=False)
           .agg(hourly_total=("hourly_counts","sum")))
daily.head()
daily.to_csv(AN/"daily_totals.csv", index=False)


In [6]:
heatmap = (df.groupby(["sensor_id","dow_name","hour"], as_index=False)
             .agg(median_count=("hourly_counts","median"),
                  mean_count=("hourly_counts","mean"),
                  samples=("hourly_counts","size")))
heatmap.head()
heatmap.to_csv(AN/"heatmap_weekday_hour.csv", index=False)


In [7]:
power = (heatmap.sort_values(["sensor_id","median_count"], ascending=[True,False])
                 .groupby("sensor_id")
                 .head(3)
                 .assign(rank=lambda d: d.groupby("sensor_id").cumcount()+1))
power.head()
power.to_csv(AN/"power_hours_top3.csv", index=False)


In [8]:
rain = (df.groupby(["sensor_id","rain_flag"], as_index=False)
          .agg(med=("hourly_counts","median")))
pivot = (rain.pivot(index="sensor_id", columns="rain_flag", values="med")
             .rename(columns={0:"med_no_rain",1:"med_rain"})
             .reset_index())
pivot["rain_delta_pct"] = (pivot["med_rain"] - pivot["med_no_rain"]) / pivot["med_no_rain"] * 100
pivot.head()
pivot.to_csv(AN/"rain_uplift_by_sensor.csv", index=False)


In [9]:
hourly = df[["sensor_id","date_time","hourly_counts","rain_flag","temp_c","temp_bin"]].copy()
hourly.head()
hourly.to_csv(AN/"counts_by_hour.csv", index=False)


In [10]:
sensors = pd.read_csv(INTERIM/"sensor_locations_clean.csv")
look = sensors[["sensor_id","sensor_name"]].drop_duplicates()
look.to_csv(AN/"sensors_lookup.csv", index=False)


In [11]:
!git -C "{ROOT}" status -s

!git -C "{ROOT}" add \
    notebooks/04_features_kpis.ipynb \
    "{AN}/daily_totals.csv" \
    "{AN}/heatmap_weekday_hour.csv" \
    "{AN}/power_hours_top3.csv" \
    "{AN}/rain_uplift_by_sensor.csv" \
    "{AN}/temp_uplift_by_sensor.csv" \
    "{AN}/counts_by_hour.csv" \
    "{AN}/sensors_lookup.csv"

!git -C "{ROOT}" commit -m "04: features & KPIs — heatmap, power hours, rain/temp effects; exports for Looker Studio"
!git -C "{ROOT}" push origin main


 [31mM[m notebooks/02_clean_join_geo.ipynb
 [31mM[m notebooks/03_weather_merge.ipynb
 [31mM[m notebooks/04_features_kpis.ipynb
[31m??[m Miniforge3-MacOSX-arm64.sh
[31m??[m analytics/looker_studio_datasources/counts_by_hour.csv
[31m??[m analytics/looker_studio_datasources/daily_totals.csv
[31m??[m analytics/looker_studio_datasources/heatmap_weekday_hour.csv
[31m??[m analytics/looker_studio_datasources/power_hours_top3.csv
[31m??[m analytics/looker_studio_datasources/rain_uplift_by_sensor.csv
[31m??[m analytics/looker_studio_datasources/sensors_lookup.csv
fatal: pathspec '/Users/poojithraj/Documents/melbourne-foot-traffic-marketing/analytics/looker_studio_datasources/temp_uplift_by_sensor.csv' did not match any files
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   notebooks/0

In [12]:
git add analytics/looker_studio_datasources/*.csv \
        notebooks/02_clean_join_geo.ipynb \
        notebooks/03_weather_merge.ipynb \
        notebooks/04_features_kpis.ipynb

git commit -m "feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)"
git push origin main


SyntaxError: invalid decimal literal (593271782.py, line 2)

In [13]:
%%bash
set -euo pipefail

# 1) See what's untracked/modified
git status -s

# 2) Stage ONLY the files that actually exist
git add analytics/looker_studio_datasources/daily_totals.csv \
        analytics/looker_studio_datasources/heatmap_weekday_hour.csv \
        analytics/looker_studio_datasources/power_hours_top3.csv \
        analytics/looker_studio_datasources/rain_uplift_by_sensor.csv \
        analytics/looker_studio_datasources/counts_by_hour.csv \
        analytics/looker_studio_datasources/sensors_lookup.csv \
        notebooks/02_clean_join_geo.ipynb \
        notebooks/03_weather_merge.ipynb \
        notebooks/04_features_kpis.ipynb

# 3) Commit
git commit -m "feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)"

# 4) Push
git push origin main


 M 02_clean_join_geo.ipynb
 M 03_weather_merge.ipynb
 M 04_features_kpis.ipynb
?? ../Miniforge3-MacOSX-arm64.sh
?? ../analytics/looker_studio_datasources/counts_by_hour.csv
?? ../analytics/looker_studio_datasources/daily_totals.csv
?? ../analytics/looker_studio_datasources/heatmap_weekday_hour.csv
?? ../analytics/looker_studio_datasources/power_hours_top3.csv
?? ../analytics/looker_studio_datasources/rain_uplift_by_sensor.csv
?? ../analytics/looker_studio_datasources/sensors_lookup.csv


fatal: pathspec 'analytics/looker_studio_datasources/daily_totals.csv' did not match any files


CalledProcessError: Command 'b'set -euo pipefail\n\n# 1) See what\'s untracked/modified\ngit status -s\n\n# 2) Stage ONLY the files that actually exist\ngit add analytics/looker_studio_datasources/daily_totals.csv \\\n        analytics/looker_studio_datasources/heatmap_weekday_hour.csv \\\n        analytics/looker_studio_datasources/power_hours_top3.csv \\\n        analytics/looker_studio_datasources/rain_uplift_by_sensor.csv \\\n        analytics/looker_studio_datasources/counts_by_hour.csv \\\n        analytics/looker_studio_datasources/sensors_lookup.csv \\\n        notebooks/02_clean_join_geo.ipynb \\\n        notebooks/03_weather_merge.ipynb \\\n        notebooks/04_features_kpis.ipynb\n\n# 3) Commit\ngit commit -m "feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)"\n\n# 4) Push\ngit push origin main\n'' returned non-zero exit status 128.

In [14]:
!git add analytics/looker_studio_datasources/daily_totals.csv
!git add analytics/looker_studio_datasources/heatmap_weekday_hour.csv
!git add analytics/looker_studio_datasources/power_hours_top3.csv
!git add analytics/looker_studio_datasources/rain_uplift_by_sensor.csv
!git add analytics/looker_studio_datasources/counts_by_hour.csv
!git add analytics/looker_studio_datasources/sensors_lookup.csv
!git add notebooks/02_clean_join_geo.ipynb notebooks/03_weather_merge.ipynb notebooks/04_features_kpis.ipynb
!git commit -m "feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)"
!git push origin main


fatal: pathspec 'analytics/looker_studio_datasources/daily_totals.csv' did not match any files
fatal: pathspec 'analytics/looker_studio_datasources/heatmap_weekday_hour.csv' did not match any files
fatal: pathspec 'analytics/looker_studio_datasources/power_hours_top3.csv' did not match any files
fatal: pathspec 'analytics/looker_studio_datasources/rain_uplift_by_sensor.csv' did not match any files
fatal: pathspec 'analytics/looker_studio_datasources/counts_by_hour.csv' did not match any files
fatal: pathspec 'analytics/looker_studio_datasources/sensors_lookup.csv' did not match any files
fatal: pathspec 'notebooks/02_clean_join_geo.ipynb' did not match any files
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   02_clean_join_geo.ipynb[m
	[31mmodified:   03_weather_merge.ipynb[m
	[31mmodified

In [15]:
%%bash
set -euo pipefail

# 1) Go to the repo root (no matter where the notebook lives)
cd "$(git rev-parse --show-toplevel)"

# 2) Stage the CSV exports + the three notebooks
git add \
  analytics/looker_studio_datasources/daily_totals.csv \
  analytics/looker_studio_datasources/heatmap_weekday_hour.csv \
  analytics/looker_studio_datasources/power_hours_top3.csv \
  analytics/looker_studio_datasources/rain_uplift_by_sensor.csv \
  analytics/looker_studio_datasources/counts_by_hour.csv \
  analytics/looker_studio_datasources/sensors_lookup.csv \
  notebooks/02_clean_join_geo.ipynb \
  notebooks/03_weather_merge.ipynb \
  notebooks/04_features_kpis.ipynb

# 3) Commit (OK if it says "nothing to commit")
git commit -m "feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)"

# 4) Push to GitHub
git push origin main


[main c9badb9] feat: KPIs & exports for dashboard (daily_totals, heatmap_weekday_hour, power_hours_top3, rain_uplift_by_sensor, counts_by_hour, sensors_lookup)
 9 files changed, 83818 insertions(+), 3 deletions(-)
 create mode 100644 analytics/looker_studio_datasources/counts_by_hour.csv
 create mode 100644 analytics/looker_studio_datasources/daily_totals.csv
 create mode 100644 analytics/looker_studio_datasources/heatmap_weekday_hour.csv
 create mode 100644 analytics/looker_studio_datasources/power_hours_top3.csv
 create mode 100644 analytics/looker_studio_datasources/rain_uplift_by_sensor.csv
 create mode 100644 analytics/looker_studio_datasources/sensors_lookup.csv


To https://github.com/RajPoo7/melbourne-foot-traffic-marketing.git
   864509a..c9badb9  main -> main


In [16]:
cd "$(git rev-parse --show-toplevel)"   # jump to your repo root
git status -s                           # quick check of changes
git add analytics/looker_studio_datasources/*.csv notebooks/*.ipynb README.md
git commit -m "checkpoint: end of day – data exports + notebooks saved"
git push origin main


SyntaxError: invalid syntax (2863332803.py, line 1)