In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0] if Path.cwd().name == "notebooks" else Path.cwd()
DATA_PATH = PROJECT_ROOT / "data" / "raw" / "venue_weekly_data.csv"

plt.style.use("default")


ModuleNotFoundError: No module named 'pandas'

In [None]:
df = pd.read_csv(DATA_PATH, parse_dates=["date"])
df.head()


In [None]:
df.info()
df.describe(numeric_only=True)


In [None]:
df_sorted = df.sort_values("date")

plt.figure(figsize=(10, 4))
plt.plot(df_sorted["date"], df_sorted["revenue"])
plt.title("Weekly revenue – all venues combined")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))

for venue_id, group in df_sorted.groupby("venue_id"):
    plt.plot(group["date"], group["revenue"], label=venue_id)

plt.title("Weekly revenue by venue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
df.groupby("promotion")["revenue"].mean()
df.groupby("local_event")["revenue"].mean()
promo_means = df.groupby("promotion")["revenue"].mean()

plt.figure(figsize=(5, 4))
promo_means.plot(kind="bar")
plt.xticks([0, 1], ["No promotion", "Promotion"], rotation=0)
plt.ylabel("Average revenue")
plt.title("Average revenue with/without promotion")
plt.tight_layout()
plt.show()


In [None]:
PROCESSED_PATH = PROJECT_ROOT / "data" / "processed" / "test_predictions_with_staffing.csv"
preds = pd.read_csv(PROCESSED_PATH, parse_dates=["date"])
preds.head()


In [None]:
venue_example = preds["venue_id"].unique()[0]
venue_example


In [None]:
venue_df = preds[preds["venue_id"] == venue_example].sort_values("date")

plt.figure(figsize=(10, 5))
plt.plot(venue_df["date"], venue_df["revenue"], label="Actual revenue")
plt.plot(venue_df["date"], venue_df["predicted_revenue"], label="Predicted revenue")
plt.title(f"Actual vs predicted weekly revenue – {venue_example}")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(preds["revenue"], preds["predicted_revenue"], alpha=0.6)
max_val = max(preds["revenue"].max(), preds["predicted_revenue"].max())
plt.plot([0, max_val], [0, max_val])
plt.xlabel("Actual revenue")
plt.ylabel("Predicted revenue")
plt.title("Actual vs predicted revenue (all venues)")
plt.tight_layout()
plt.show()


In [None]:
cols_to_show = [
    "date",
    "venue_id",
    "revenue",
    "predicted_revenue",
    "staff_hours",
    "suggested_staff_hours_min",
    "suggested_staff_hours_centre",
    "suggested_staff_hours_max",
]

venue_staff = preds[preds["venue_id"] == venue_example].sort_values("date", ascending=False)
venue_staff[cols_to_show].head(10)


In [None]:
recent = venue_staff.sort_values("date").tail(12)

plt.figure(figsize=(10, 5))
plt.plot(recent["date"], recent["staff_hours"], label="Actual staff hours")
plt.plot(recent["date"], recent["suggested_staff_hours_centre"], label="Suggested (centre)")
plt.fill_between(
    recent["date"],
    recent["suggested_staff_hours_min"],
    recent["suggested_staff_hours_max"],
    alpha=0.2,
    label="Suggested range",
)
plt.title(f"Actual vs suggested staff hours – last 12 weeks – {venue_example}")
plt.xlabel("Date")
plt.ylabel("Staff hours")
plt.legend()
plt.tight_layout()
plt.show()
