# hourly anomaly national public holidays (hard coded)

### NY Day

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation, focus date, and years ---
station = "BLAKE"
focus_date = "01-01"   # focus date
years = [2014, 2015]   # years to average
8# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    ref_date = pd.Timestamp(f"{year}-{focus_date}")
    start = ref_date - pd.Timedelta(days=30)
    end   = ref_date + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only the chosen date (24 hours) ---
daily_anomaly = avg_anomaly.loc[avg_anomaly.index.str.startswith(focus_date)]

# --- Step 10: Plot hourly anomaly curve for chosen date ---
plt.figure(figsize=(12,5))
plt.plot(range(24), daily_anomaly.values, color="red", linewidth=2,
         marker="o", label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on {focus_date} ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

### Aus Day

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation, focus date, and years ---
station = "BLAKE"
focus_date = "01-26"   # e.g. Australia Day
years = [2014, 2015]   # years to average

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    ref_date = pd.Timestamp(f"{year}-{focus_date}")
    start = ref_date - pd.Timedelta(days=30)
    end   = ref_date + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only the chosen date (24 hours) ---
daily_anomaly = avg_anomaly.loc[avg_anomaly.index.str.startswith(focus_date)]

# --- Step 10: Plot hourly anomaly curve for chosen date ---
plt.figure(figsize=(12,5))
plt.plot(range(24), daily_anomaly.values, color="red", linewidth=2,
         marker="o", label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on {focus_date} ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

#### Consecutive Year Plots for all substations

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Define focus date and year range ---
focus_date = "01-26"   # Australia Day
year_range = range(2004, 2019)  # inclusive 2004–2018

# --- Step 3: Resample to hourly demand ---
hourly = demand.resample("h").mean()

# --- Step 4: Loop through each substation ---
for station in hourly.columns:
    full_name = info.loc[station, "Name"]

    # --- Step 5: Loop through consecutive year intervals ---
    for y1, y2 in zip(year_range[:-1], year_range[1:]):
        # Collect ±30-day windows for both years
        windows = []
        for year in [y1, y2]:
            ref_date = pd.Timestamp(f"{year}-{focus_date}")
            start = ref_date - pd.Timedelta(days=30)
            end   = ref_date + pd.Timedelta(days=30)
            window = hourly.loc[start:end, [station]].copy()
            window["mdh"] = window.index.strftime("%m-%d %H:%M")
            window = window.set_index("mdh")
            windows.append(window)

        # Combine both windows
        combined = pd.concat(windows, axis=1)

        # Compute baseline (mean demand across both years' ±30-day windows)
        baseline_mean = combined.mean(axis=1).mean()

        # Compute anomalies for each year relative to baseline
        anomalies = combined.subtract(baseline_mean, axis=0)

        # Average anomalies across the two years
        avg_anomaly = anomalies.mean(axis=1)

        # Extract only Australia Day (24 hours) across both years
        ad_hours = []
        for year in [y1, y2]:
            ref_date = pd.Timestamp(f"{year}-{focus_date}")
            day_str = ref_date.strftime("%m-%d")
            daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
            daily.index = range(24)  # reset index to hour of day
            ad_hours.append(daily)

        # Stack and average across the two years (24 values total)
        ad_daily = pd.concat(ad_hours, axis=1).mean(axis=1)

        # --- Plot ---
        plt.figure(figsize=(12,5))
        plt.plot(ad_daily.index, ad_daily.values, color="red", linewidth=2, marker="o",
                 label=f"Average Anomaly ({y1}–{y2})")

        plt.axhline(0, color="black", linewidth=1)
        plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
        plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

        plt.title(f"{full_name} Average Demand Anomaly on Australia Day ({y1}–{y2})", fontsize=14)
        plt.xlabel("Hour of Day")
        plt.ylabel("Electricity Demand Anomaly")
        plt.legend()
        plt.tight_layout()
        plt.close()

NameError: name 'demand' is not defined

Consecutive Year Plots for one substation (blake)

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Define focus date and year range ---
station = "BLAKE"
focus_date = "01-26"   # Australia Day
year_range = range(2004, 2019)  # inclusive 2004–2018

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Loop through consecutive year intervals ---
for y1, y2 in zip(year_range[:-1], year_range[1:]):
    # Collect ±30-day windows for both years
    windows = []
    for year in [y1, y2]:
        ref_date = pd.Timestamp(f"{year}-{focus_date}")
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window["mdh"] = window.index.strftime("%m-%d %H:%M")
        window = window.set_index("mdh")
        windows.append(window)

    # Combine both windows
    combined = pd.concat(windows, axis=1)

    # Compute baseline (mean demand across both years' ±30-day windows)
    baseline_mean = combined.mean(axis=1).mean()

    # Compute anomalies for each year relative to baseline
    anomalies = combined.subtract(baseline_mean, axis=0)

    # Average anomalies across the two years
    avg_anomaly = anomalies.mean(axis=1)

    # Extract only Australia Day (24 hours) across both years
    ad_hours = []
    for year in [y1, y2]:
        ref_date = pd.Timestamp(f"{year}-{focus_date}")
        day_str = ref_date.strftime("%m-%d")
        daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
        daily.index = range(24)  # reset index to hour of day
        ad_hours.append(daily)

    # Stack and average across the two years (24 values total)
    ad_daily = pd.concat(ad_hours, axis=1).mean(axis=1)

    # --- Plot ---
    plt.figure(figsize=(12,5))
    plt.plot(ad_daily.index, ad_daily.values, color="red", linewidth=2, marker="o",
             label=f"Average Anomaly ({y1}–{y2})")

    plt.axhline(0, color="black", linewidth=1)
    plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
    plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

    full_name = info.loc[station, "Name"]
    plt.title(f"{full_name} Average Demand Anomaly on Australia Day ({y1}–{y2})", fontsize=14)
    plt.xlabel("Hour of Day")
    plt.ylabel("Electricity Demand Anomaly")
    plt.legend()
    plt.tight_layout()
    plt.close()

NameError: name 'demand' is not defined

### Good Friday (moving)

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dateutil.easter import easter

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation and years ---
station = "BLAKE"
years = [2014, 2015]   # extend as needed

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    good_friday = easter(year) - pd.Timedelta(days=2)
    start = good_friday - pd.Timedelta(days=30)
    end   = good_friday + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only Good Friday (24 hours) across years ---
gf_hours = []
for year in years:
    good_friday = easter(year) - pd.Timedelta(days=2)
    day_str = good_friday.strftime("%m-%d")
    daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
    # reset index to hour of day (0–23)
    daily.index = range(24)
    gf_hours.append(daily)

# --- Step 10: Stack and average across years (now 24 values total) ---
gf_daily = pd.concat(gf_hours, axis=1).mean(axis=1)

# --- Step 11: Plot hourly anomaly curve for Good Friday ---
plt.figure(figsize=(12,5))
plt.plot(gf_daily.index, gf_daily.values, color="red", linewidth=2, marker="o",
         label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on Good Friday ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

### Easter Monday (Moving)

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dateutil.easter import easter

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation and years ---
station = "BLAKE"
years = [2014, 2015]   # extend as needed

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    easter_sunday = easter(year)
    easter_monday = easter_sunday + pd.Timedelta(days=1)
    start = easter_monday - pd.Timedelta(days=30)
    end   = easter_monday + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only Easter Monday (24 hours) across years ---
em_hours = []
for year in years:
    easter_monday = easter(year) + pd.Timedelta(days=1)
    day_str = easter_monday.strftime("%m-%d")
    daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
    daily.index = range(24)  # reset index to hour of day
    em_hours.append(daily)

# --- Step 10: Stack and average across years (now 24 values total) ---
em_daily = pd.concat(em_hours, axis=1).mean(axis=1)

# --- Step 11: Plot hourly anomaly curve for Easter Monday ---
plt.figure(figsize=(12,5))
plt.plot(em_daily.index, em_daily.values, color="red", linewidth=2, marker="o",
         label=f"Average Anomaly ({years[0]}–{years[-1]})")

plt.axhline(0, color="black", linewidth=1)
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on Easter Monday ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

### ANZAC Day

In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation, focus date, and years ---
station = "BLAKE"
focus_date = "04-25"   # focus date
years = [2007, 2008]   # years to average
8# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    ref_date = pd.Timestamp(f"{year}-{focus_date}")
    start = ref_date - pd.Timedelta(days=30)
    end   = ref_date + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only the chosen date (24 hours) ---
daily_anomaly = avg_anomaly.loc[avg_anomaly.index.str.startswith(focus_date)]

# --- Step 10: Plot hourly anomaly curve for chosen date ---
plt.figure(figsize=(12,5))
plt.plot(range(24), daily_anomaly.values, color="red", linewidth=2,
         marker="o", label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on {focus_date} ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation, focus date, and years ---
station = "BLAKE"
focus_date = "12-25"   # e.g. Australia Day
years = [2004, 2005]   # years to average

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    ref_date = pd.Timestamp(f"{year}-{focus_date}")
    start = ref_date - pd.Timedelta(days=30)
    end   = ref_date + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only the chosen date (24 hours) ---
daily_anomaly = avg_anomaly.loc[avg_anomaly.index.str.startswith(focus_date)]

# --- Step 10: Plot hourly anomaly curve for chosen date ---
plt.figure(figsize=(12,5))
plt.plot(range(24), daily_anomaly.values, color="red", linewidth=2,
         marker="o", label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on {focus_date} ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

### Boxing Day

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Step 1: Ensure datetime index ---
demand.index = pd.to_datetime(demand.index)

# --- Step 2: Choose substation, focus date, and years ---
station = "BLAKE"
focus_date = "12-26"   # e.g. Australia Day
years = [2006, 2007]   # years to average

# --- Step 3: Resample to hourly demand ---
hourly = demand[[station]].resample("h").mean()

# --- Step 4: Collect ±30-day windows for each year ---
windows = []
for year in years:
    ref_date = pd.Timestamp(f"{year}-{focus_date}")
    start = ref_date - pd.Timedelta(days=30)
    end   = ref_date + pd.Timedelta(days=30)
    window = hourly.loc[start:end].copy()
    window["mdh"] = window.index.strftime("%m-%d %H:%M")  # align by calendar date
    window = window.set_index("mdh")
    windows.append(window)

# --- Step 5: Combine all windows ---
combined = pd.concat(windows, axis=1)

# --- Step 6: Compute baseline (mean demand across all years' ±30-day windows) ---
baseline_mean = combined.mean(axis=1).mean()

# --- Step 7: Compute anomalies for each year relative to baseline ---
anomalies = combined.subtract(baseline_mean, axis=0)

# --- Step 8: Average anomalies across all chosen years ---
avg_anomaly = anomalies.mean(axis=1)

# --- Step 9: Extract only the chosen date (24 hours) ---
daily_anomaly = avg_anomaly.loc[avg_anomaly.index.str.startswith(focus_date)]

# --- Step 10: Plot hourly anomaly curve for chosen date ---
plt.figure(figsize=(12,5))
plt.plot(range(24), daily_anomaly.values, color="red", linewidth=2,
         marker="o", label=f"Average Anomaly ({years[0]}–{years[-1]})")

# Baseline at 0
plt.axhline(0, color="black", linewidth=1)

# Grid and ticks
plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

# Title with full substation name
full_name = info.loc[station, "Name"]
plt.title(f"{full_name} Average Demand Anomaly on {focus_date} ({years[0]}–{years[-1]})", fontsize=14)
plt.xlabel("Hour of Day")
plt.ylabel("Electricity Demand Anomaly")
plt.legend()
plt.tight_layout()
plt.close()

NameError: name 'demand' is not defined

## Comparing Public Holidays against Weekend Anomalies (of the same month)

In [10]:
import calendar

def compare_holiday_vs_month_weekends(demand, info, station, years, holiday_func, holiday_name):
    """Compare demand anomalies on a holiday vs. weekends of the same month, with month name in legend."""
    demand.index = pd.to_datetime(demand.index)
    hourly = demand[[station]].resample("h").mean()

    # Collect ±30-day windows for each year
    windows = []
    for year in years:
        ref_date = holiday_func(year)
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window["mdh"] = window.index.strftime("%m-%d %H:%M")
        window = window.set_index("mdh")
        windows.append(window)

    combined = pd.concat(windows, axis=1)
    baseline_mean = combined.mean(axis=1).mean()
    anomalies = combined.subtract(baseline_mean, axis=0)
    avg_anomaly = anomalies.mean(axis=1)

    # Holiday anomalies (24 hours)
    holiday_hours = []
    for year in years:
        ref_date = holiday_func(year)
        day_str = ref_date.strftime("%m-%d")
        daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
        daily.index = range(24)
        holiday_hours.append(daily)
    holiday_profile = pd.concat(holiday_hours, axis=1).mean(axis=1)

    # Weekend anomalies for same month
    weekend_hours = []
    for year in years:
        ref_date = holiday_func(year)
        month_start = pd.Timestamp(f"{year}-{ref_date.month:02d}-01")
        month_end = (month_start + pd.offsets.MonthEnd(0))
        month_data = hourly.loc[month_start:month_end].copy()
        month_anomaly = month_data.subtract(baseline_mean)
        for day in pd.date_range(month_start, month_end):
            if day.weekday() >= 5:  # Sat=5, Sun=6
                start = day.replace(hour=0, minute=0)
                end   = day.replace(hour=23, minute=59)
                daily = month_anomaly.loc[start:end, station]
                if not daily.empty:
                    daily.index = daily.index.hour
                    weekend_hours.append(daily)
    weekend_profile = pd.concat(weekend_hours, axis=1).mean(axis=1)

    # Get month name for legend
    month_name = calendar.month_name[holiday_func(years[0]).month]

    # Plot
    plt.figure(figsize=(12,5))
    plt.plot(holiday_profile.index, holiday_profile.values, color="red", linewidth=2, marker="o",
             label=f"{holiday_name} Anomaly")
    plt.plot(weekend_profile.index, weekend_profile.values, color="blue", linewidth=2, marker="s",
             label=f"{month_name} Weekends Anomaly")

    plt.axhline(0, color="black", linewidth=1)
    plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
    plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

    full_name = info.loc[station, "Name"]
    plt.title(f"{full_name} Demand Anomaly: {holiday_name} vs. {month_name} Weekends ({years[0]}–{years[-1]})", fontsize=14)
    plt.xlabel("Hour of Day")
    plt.ylabel("Electricity Demand Anomaly")
    plt.legend()
    plt.tight_layout()
    plt.close()

# Soft coding anomalies

## Australia day vs Jan weekends consecutive intervals (soft coded)
- 30 days +/- around the Aus day
- can't change the pub holiday
- can change year interval and substation 

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def compare_australia_day_vs_jan_weekends(demand, info, station, years):
    """
    Compare demand anomalies on Australia Day vs. January weekends for a given year interval.
    """
    demand.index = pd.to_datetime(demand.index)
    hourly = demand[[station]].resample("h").mean()

    # Collect ±30-day windows around Australia Day for each year
    windows = []
    for year in years:
        ref_date = pd.Timestamp(f"{year}-01-26")
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window["mdh"] = window.index.strftime("%m-%d %H:%M")
        window = window.set_index("mdh")
        windows.append(window)

    combined = pd.concat(windows, axis=1)
    baseline_mean = combined.mean(axis=1).mean()
    anomalies = combined.subtract(baseline_mean, axis=0)
    avg_anomaly = anomalies.mean(axis=1)

    # Australia Day anomalies (24 hours)
    ad_hours = []
    for year in years:
        ref_date = pd.Timestamp(f"{year}-01-26")
        day_str = ref_date.strftime("%m-%d")
        daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
        daily.index = range(24)
        ad_hours.append(daily)
    australia_day = pd.concat(ad_hours, axis=1).mean(axis=1)

    # January weekend anomalies (24 hours)
    weekend_hours = []
    for year in years:
        jan = hourly.loc[f"{year}-01-01":f"{year}-01-31"].copy()
        jan_anomaly = jan.subtract(baseline_mean)
        for day in pd.date_range(f"{year}-01-01", f"{year}-01-31"):
            if day.weekday() >= 5:  # Sat=5, Sun=6
                start = day.replace(hour=0, minute=0)
                end   = day.replace(hour=23, minute=59)
                daily = jan_anomaly.loc[start:end, station]
                if not daily.empty:
                    daily.index = daily.index.hour
                    weekend_hours.append(daily)
    weekends = pd.concat(weekend_hours, axis=1).mean(axis=1)

    # Plot
    plt.figure(figsize=(12,5))
    plt.plot(australia_day.index, australia_day.values, color="red", linewidth=2, marker="o",
             label="Australia Day Anomaly")
    plt.plot(weekends.index, weekends.values, color="blue", linewidth=2, marker="s",
             label="January Weekends Anomaly")

    plt.axhline(0, color="black", linewidth=1)
    plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
    plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

    full_name = info.loc[station, "Name"]
    plt.title(f"{full_name} Demand Anomaly: Australia Day vs. January Weekends ({years[0]}–{years[-1]})", fontsize=14)
    plt.xlabel("Hour of Day")
    plt.ylabel("Electricity Demand Anomaly")
    plt.legend()
    plt.tight_layout()
    plt.close()


# --- Loop through consecutive intervals ---
station = "BLAKE"
year_range = range(2004, 2019)  # 2004–2018 inclusive

for y1, y2 in zip(year_range[:-1], year_range[1:]):
    compare_australia_day_vs_jan_weekends(demand, info, station, [y1, y2])

NameError: name 'demand' is not defined

## Flexible Function: holiday vs month weekend
- using the function above

In [12]:
#How to use the line of code below

station = "BLAKE"

# Australia Day vs January weekends
compare_holiday_vs_month_weekends(
    demand, info, station, [2004, 2005],
    holiday_func=lambda y: pd.Timestamp(f"{y}-01-26"),
    holiday_name="Australia Day",
    month=1
)

# ANZAC Day vs April weekends
compare_holiday_vs_month_weekends(
    demand, info, station, [2007, 2008],
    holiday_func=lambda y: pd.Timestamp(f"{y}-04-25"),
    holiday_name="ANZAC Day",
    month=4
)

# Christmas Day vs December weekends
compare_holiday_vs_month_weekends(
    demand, info, station, [2014, 2015],
    holiday_func=lambda y: pd.Timestamp(f"{y}-12-25"),
    holiday_name="Christmas Day",
    month=12
)

# Good Friday vs March/April weekends (depending on year)
compare_holiday_vs_month_weekends(
    demand, info, station, [2017, 2018],
    holiday_func=lambda y: easter(y) - pd.Timedelta(days=2),
    holiday_name="Good Friday",
    month=(easter(2017).month)  # pick month dynamically if needed
)

NameError: name 'demand' is not defined

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import calendar
from dateutil.easter import easter

def compare_holiday_vs_month_weekends(demand, info, station, years, holiday_func, holiday_name, month):
    """
    Compare demand anomalies on a holiday vs. weekends of a chosen month.
    
    Parameters
    ----------
    demand : DataFrame
        Time series demand data with datetime index and substation columns.
    info : DataFrame
        Metadata table with substation codes as index and 'Name' column for full names.
    station : str
        Substation code (e.g. "BLAKE").
    years : list
        List of years to include (e.g. [2004, 2005]).
    holiday_func : function(year) -> pd.Timestamp
        Function that returns the holiday date for a given year.
    holiday_name : str
        Name of the holiday (for plot titles).
    month : int
        Month number (1–12) whose weekends are used for comparison.
    """
    
    demand.index = pd.to_datetime(demand.index)
    hourly = demand[[station]].resample("h").mean()
    
    # Collect ±30-day windows around the holiday
    windows = []
    for year in years:
        ref_date = holiday_func(year)
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window["mdh"] = window.index.strftime("%m-%d %H:%M")
        window = window.set_index("mdh")
        windows.append(window)
    
    combined = pd.concat(windows, axis=1)
    baseline_mean = combined.mean(axis=1).mean()
    anomalies = combined.subtract(baseline_mean, axis=0)
    avg_anomaly = anomalies.mean(axis=1)
    
    # Holiday anomalies (24 hours)
    holiday_hours = []
    for year in years:
        ref_date = holiday_func(year)
        day_str = ref_date.strftime("%m-%d")
        daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
        daily.index = range(24)
        holiday_hours.append(daily)
    holiday_profile = pd.concat(holiday_hours, axis=1).mean(axis=1)
    
    # Weekend anomalies for chosen month
    weekend_hours = []
    for year in years:
        month_start = pd.Timestamp(f"{year}-{month:02d}-01")
        month_end = month_start + pd.offsets.MonthEnd(0)
        month_data = hourly.loc[month_start:month_end].copy()
        month_anomaly = month_data.subtract(baseline_mean)
        for day in pd.date_range(month_start, month_end):
            if day.weekday() >= 5:  # Sat=5, Sun=6
                start = day.replace(hour=0, minute=0)
                end   = day.replace(hour=23, minute=59)
                daily = month_anomaly.loc[start:end, station]
                if not daily.empty:
                    daily.index = daily.index.hour
                    weekend_hours.append(daily)
    weekend_profile = pd.concat(weekend_hours, axis=1).mean(axis=1)
    
    # Month name for legend/title
    month_name = calendar.month_name[month]
    
    # Plot
    plt.figure(figsize=(12,5))
    plt.plot(holiday_profile.index, holiday_profile.values, color="red", linewidth=2, marker="o",
             label=f"{holiday_name} Anomaly")
    plt.plot(weekend_profile.index, weekend_profile.values, color="blue", linewidth=2, marker="s",
             label=f"{month_name} Weekends Anomaly")
    
    plt.axhline(0, color="black", linewidth=1)
    plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
    plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)
    
    full_name = info.loc[station, "Name"]
    plt.title(f"{full_name} Demand Anomaly: {holiday_name} vs. {month_name} Weekends ({years[0]}–{years[-1]})", fontsize=14)
    plt.xlabel("Hour of Day")
    plt.ylabel("Electricity Demand Anomaly")
    plt.legend()
    plt.tight_layout()
    plt.show()

## Comparing weekend, weekday and holiday of the same 30+/- day window (rather than same month)
- soft coding the station, year intervals, public holiday

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dateutil.easter import easter
import os

def compare_holiday_weekends_weekdays_window(
    demand, info, station, years, holiday_func, holiday_name, output_dir="plots"
):
    """
    Compare demand anomalies on a holiday vs. weekends and weekdays
    within the same ±30-day window around the holiday.
    Saves plots as PNG files instead of showing them.
    """

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    demand.index = pd.to_datetime(demand.index)
    hourly = demand[[station]].resample("h").mean()

    # Collect ±30-day windows around the holiday
    windows = []
    for year in years:
        ref_date = holiday_func(year)
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window["mdh"] = window.index.strftime("%m-%d %H:%M")
        window = window.set_index("mdh")
        windows.append(window)

    combined = pd.concat(windows, axis=1)
    baseline_mean = combined.mean(axis=1).mean()
    anomalies = combined.subtract(baseline_mean, axis=0)
    avg_anomaly = anomalies.mean(axis=1)

    # Holiday anomalies (24 hours)
    holiday_hours = []
    for year in years:
        ref_date = holiday_func(year)
        day_str = ref_date.strftime("%m-%d")
        daily = avg_anomaly.loc[avg_anomaly.index.str.startswith(day_str)]
        daily.index = range(24)
        holiday_hours.append(daily)
    holiday_profile = pd.concat(holiday_hours, axis=1).mean(axis=1)

    # Weekend and weekday anomalies within the same ±30-day window
    weekend_hours = []
    weekday_hours = []
    for year in years:
        ref_date = holiday_func(year)
        start = ref_date - pd.Timedelta(days=30)
        end   = ref_date + pd.Timedelta(days=30)
        window = hourly.loc[start:end].copy()
        window_anomaly = window.subtract(baseline_mean)
        for day in pd.date_range(start, end):
            start_day = day.replace(hour=0, minute=0)
            end_day   = day.replace(hour=23, minute=59)
            daily = window_anomaly.loc[start_day:end_day, station]
            if not daily.empty:
                daily.index = daily.index.hour
                if day.weekday() >= 5:  # Sat=5, Sun=6
                    weekend_hours.append(daily)
                else:
                    weekday_hours.append(daily)
    weekend_profile = pd.concat(weekend_hours, axis=1).mean(axis=1)
    weekday_profile = pd.concat(weekday_hours, axis=1).mean(axis=1)

    # --- Plot ---
    plt.figure(figsize=(12,5))
    plt.plot(holiday_profile.index, holiday_profile.values, color="red", linewidth=2, marker="o",
             label=f"{holiday_name} (±30-Day Window)")
    if not weekend_profile.empty:
        plt.plot(weekend_profile.index, weekend_profile.values, color="blue", linewidth=2, marker="s",
                 label="Weekends (±30-Day Window)")
    if not weekday_profile.empty:
        plt.plot(weekday_profile.index, weekday_profile.values, color="green", linewidth=2, marker="^",
                 label="Weekdays (±30-Day Window)")

    plt.axhline(0, color="black", linewidth=1)
    plt.grid(axis='y', linestyle='-', linewidth=0.5, color='gray', alpha=0.3)
    plt.xticks(np.arange(0, 24, 1), [f"{h:02d}:00" for h in range(24)], rotation=45)

    full_name = info.loc[station, "Name"]
    plt.title(
        f"{full_name} Demand Anomaly: {holiday_name} vs. Weekends & Weekdays "
        f"(±30-Day Window, {years[0]}–{years[-1]})",
        fontsize=14
    )
    plt.xlabel("Hour of Day")
    plt.ylabel("Electricity Demand Anomaly")
    plt.legend()
    plt.tight_layout()

    # Save instead of show
    filename = f"{station}_{holiday_name}_{years[0]}-{years[-1]}.png"
    filepath = os.path.join(output_dir, filename)
    plt.savefig(filepath, dpi=300)
    plt.close()

    return filepath

In [5]:
# Define the station
station = "BLAKE"

# Define the year intervals you want to loop through
intervals = [
    [2004, 2005],
    [2007, 2008],
    [2014, 2015],
    [2017, 2018], #Christmas and Boxing Day do not have data for 2017-2018
]

# Define the all public holidays (including moving i.e. easter, is accounted for)
holidays = {
    "New Year's Day": lambda y: pd.Timestamp(f"{y}-01-01"),
    "Australia Day": lambda y: pd.Timestamp(f"{y}-01-26"),
    "Good Friday": lambda y: easter(y) - pd.Timedelta(days=2),
    "Easter Saturday": lambda y: easter(y) - pd.Timedelta(days=1),
    "Easter Sunday": lambda y: easter(y),
    "Easter Monday": lambda y: easter(y) + pd.Timedelta(days=1),
    "ANZAC Day": lambda y: pd.Timestamp(f"{y}-04-25"),
    "Christmas Day": lambda y: pd.Timestamp(f"{y}-12-25"),
    "Boxing Day": lambda y: pd.Timestamp(f"{y}-12-26"),
}

# Loop through intervals and holidays
for years in intervals:
    for holiday_name, holiday_func in holidays.items():
        compare_holiday_weekends_weekdays_window(
            demand, info, station, years,
            holiday_func=holiday_func,
            holiday_name=holiday_name
        )

NameError: name 'demand' is not defined