In [1]:
import polars as pl
import json

%load_ext autoreload
%autoreload 2

# Base Load Analysis Demo with Polars

This notebook demonstrates analyzing base load (standby power consumption) using the BaseloadAnalyzer class. Base load represents the minimum continuous power draw in a system.

## Key Metrics
1. Base load value in WATTS - Consistent minimum power draw
2. Energy consumption in kWh - Power used over time
3. Base load percentage - Portion of total consumption that is baseline

## Data Requirements
Input data (LazyFrame):
- timestamp: datetime with timezone
- total: energy readings in kWh (15-minute intervals)


> ⚠️**Note:** we have several example files available,  
> *energy_use_big* is from a giant building with incredible base load.  
> *energy_use_test1* is from a regular family residence.


In [2]:
from openenergyid.baseload.analysis import BaseloadAnalyzer

# Define schema for data loading
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}

# Load example data with schema
# energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema)
energy_data = pl.read_json("data/PP/gapped_series.json", schema=schema)
energy_data = energy_data.lazy()

In [3]:
# Load the custom JSON structure
with open("data/PP/gapped_series.json") as f:
    json_data = json.load(f)

# Convert to DataFrame - handle timezones properly
energy_data = (
    pl.DataFrame(
        {
            "timestamp": pl.Series(json_data["data"]["index"]),
            "total": pl.Series(json_data["data"]["data"]),
        }
    )
    .with_columns(
        # Parse timestamps with explicit timezone
        pl.col("timestamp").str.to_datetime("%Y-%m-%dT%H:%M:%S%z").alias("timestamp")
    )
    .lazy()
)

print("After proper loading:")
print(f"energy_data length: {energy_data.select(pl.len()).collect().item()}")
if energy_data.select(pl.len()).collect().item() > 0:
    print("Sample energy_data:")
    print(energy_data.limit(5).collect())

After proper loading:
energy_data length: 32736
Sample energy_data:
shape: (5, 2)
┌─────────────────────────┬───────┐
│ timestamp               ┆ total │
│ ---                     ┆ ---   │
│ datetime[μs, UTC]       ┆ f64   │
╞═════════════════════════╪═══════╡
│ 2024-12-31 23:00:00 UTC ┆ 0.041 │
│ 2024-12-31 23:15:00 UTC ┆ 0.031 │
│ 2024-12-31 23:30:00 UTC ┆ 0.03  │
│ 2024-12-31 23:45:00 UTC ┆ 0.019 │
│ 2025-01-01 00:00:00 UTC ┆ 0.032 │
└─────────────────────────┴───────┘


## Initialize Analyzer
Set up analyzer with timezone and quantile settings

In [4]:
# Create analyzer (5% quantile = ~72 min of lowest daily values)
analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=0.15)

In [5]:
# Convert energy readings to power series
power_data = analyzer.prepare_power_series(energy_data)

## Analyze at Different Time Scales
Demonstrate flexibility in analysis periods

In [6]:
# Analyze at different granularities
result = analyzer.analyze(power_data, "1mo")

print(f"Global median baseload: {result.global_median_baseload} W")
print(f"\nMonthly results shape: {result.results.collect().shape}")
print(f"Monthly medians shape: {result.monthly_median_baseloads.collect().shape}")

# Show monthly summary
monthly = result.results.collect()
print("\nMonthly Base Load Analysis:")
print(monthly.select(["timestamp", "average_daily_baseload_in_watt", "baseload_ratio"]).head())

Global median baseload: 100.0 W

Monthly results shape: (12, 8)
Monthly medians shape: (12, 2)

Monthly Base Load Analysis:
shape: (5, 3)
┌───────────────────────────────┬────────────────────────────────┬────────────────┐
│ timestamp                     ┆ average_daily_baseload_in_watt ┆ baseload_ratio │
│ ---                           ┆ ---                            ┆ ---            │
│ datetime[μs, Europe/Brussels] ┆ f64                            ┆ f64            │
╞═══════════════════════════════╪════════════════════════════════╪════════════════╡
│ 2025-01-01 00:00:00 CET       ┆ 114.967742                     ┆ 1.0            │
│ 2025-02-01 00:00:00 CET       ┆ 116.142857                     ┆ 1.0            │
│ 2025-03-01 00:00:00 CET       ┆ 105.95424                      ┆ 1.0            │
│ 2025-04-01 00:00:00 CEST      ┆ 90.533333                      ┆ 1.0            │
│ 2025-05-01 00:00:00 CEST      ┆ 119.741935                     ┆ 1.0            │
└─────────────────────

In [7]:
# Monthly Median Baseloads - shows seasonal patterns
monthly_medians = result.monthly_median_baseloads.collect()
print("Monthly Median Baseloads:")
print(monthly_medians)

Monthly Median Baseloads:
shape: (12, 2)
┌───────────────────────────────┬─────────────────────────────────┐
│ timestamp                     ┆ monthly_median_baseload_in_wat… │
│ ---                           ┆ ---                             │
│ datetime[μs, Europe/Brussels] ┆ f64                             │
╞═══════════════════════════════╪═════════════════════════════════╡
│ 2025-01-01 00:00:00 CET       ┆ 116.0                           │
│ 2025-02-01 00:00:00 CET       ┆ 114.0                           │
│ 2025-03-01 00:00:00 CET       ┆ 104.0                           │
│ 2025-04-01 00:00:00 CEST      ┆ 88.0                            │
│ 2025-05-01 00:00:00 CEST      ┆ 88.0                            │
│ …                             ┆ …                               │
│ 2025-08-01 00:00:00 CEST      ┆ 88.0                            │
│ 2025-09-01 00:00:00 CEST      ┆ 100.0                           │
│ 2025-10-01 00:00:00 CEST      ┆ 96.0                            │
│ 2025-

## Visualization Example
Plot daily base load vs total consumption

In [15]:
import plotly.graph_objects as go

# Get daily results for detailed view
daily_result = analyzer.analyze(power_data, "1d")
daily = daily_result.results.collect()
monthly_medians = daily_result.monthly_median_baseloads.collect()

fig = go.Figure()

# Daily total consumption (background)
fig.add_trace(
    go.Scatter(
        x=daily["timestamp"],
        y=daily["average_power_in_watt"],
        name="Daily Avg Power",
        line=dict(color="lightgray", width=1),
    )
)

# Daily baseload (detailed line)
fig.add_trace(
    go.Scatter(
        x=daily["timestamp"],
        y=daily["average_daily_baseload_in_watt"],
        name="Daily Baseload",
        line=dict(color="steelblue", width=1.5),
    )
)

# Monthly median baseload (stepped line overlay)
fig.add_trace(
    go.Scatter(
        x=monthly_medians["timestamp"],
        y=monthly_medians["monthly_median_baseload_in_watt"],
        name="Monthly Median Baseload",
        line=dict(color="red", width=2, shape="hv"),
        mode="lines",
    )
)

# Global median (dashed horizontal)
fig.add_hline(
    y=daily_result.global_median_baseload,
    line_dash="dash",
    line_color="darkred",
    annotation_text=f"Global Median: {daily_result.global_median_baseload:.0f} W",
)

fig.update_layout(
    title="Baseload Analysis: Daily vs Monthly Median",
    xaxis_title="Date",
    yaxis_title="Power (W)",
    height=450,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)
fig.show()

In [16]:
# --- Baseload vs Total Usage: Clear Overlay with Stepped Median (using energy_use_test1.ndjson) ---
import plotly.graph_objects as go
import polars as pl

# Load data from energy_use_test1.ndjson
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}
energy_data = pl.read_ndjson("data/PP/energy_use_test1.ndjson", schema=schema)
energy_data = energy_data.lazy()

# Prepare analyzer and data
analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=0.15)
power_data = analyzer.prepare_power_series(energy_data)

# Get daily and monthly results
daily_result = analyzer.analyze(power_data, "1d")
daily = daily_result.results.collect()
monthly_medians = daily_result.monthly_median_baseloads.collect()

# Calculate Baseload Usage (area under the baseload line)
baseload_usage = daily["average_daily_baseload_in_watt"] * 24 / 1000  # kWh per day (approx)
total_usage = daily["average_power_in_watt"] * 24 / 1000  # kWh per day (approx)

fig = go.Figure()

# Total usage (background, filled area)
fig.add_trace(
    go.Scatter(
        x=daily["timestamp"],
        y=total_usage,
        name="Total Daily Usage (kWh)",
        fill="tozeroy",
        fillcolor="rgba(200,200,200,0.25)",
        line=dict(color="gray", width=1),
        mode="lines",
    )
)

# Baseload usage (filled area, on top of total)
fig.add_trace(
    go.Scatter(
        x=daily["timestamp"],
        y=baseload_usage,
        name="Baseload Usage (kWh)",
        fill="tozeroy",
        fillcolor="rgba(70,130,180,0.35)",
        line=dict(color="steelblue", width=2),
        mode="lines",
    )
)

# Monthly median baseload (stepped line, in kWh/day)
monthly_median_kwh = monthly_medians["monthly_median_baseload_in_watt"] * 24 / 1000
fig.add_trace(
    go.Scatter(
        x=monthly_medians["timestamp"],
        y=monthly_median_kwh,
        name="Monthly Median Baseload (kWh/day)",
        line=dict(color="red", width=3, shape="hv"),
        mode="lines",
    )
)

fig.update_layout(
    title="Baseload Usage as Subgraph of Total Usage with Stepped Monthly Median",
    xaxis_title="Date",
    yaxis_title="Energy (kWh/day)",
    height=500,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)
fig.show()

# full 1 cell test with visualisation for known data

In [9]:
import polars as pl
from plotly.subplots import make_subplots

# Load data with schema
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}
energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema).sort("timestamp")


# Analyze with different quantiles
def analyze_quantile(q: float):
    analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=q)
    power_data = analyzer.prepare_power_series(energy_data)
    result = analyzer.analyze(power_data, "1d")
    return result.results.collect(), result.global_median_baseload


# Get results and medians for different quantiles
q05, median_05 = analyze_quantile(0.05)
q10, median_10 = analyze_quantile(0.10)
q15, median_15 = analyze_quantile(0.15)

# Create subplots
fig = make_subplots(
    rows=2,
    cols=1,
    subplot_titles=("Energy Consumption (kWh)", "Daily Baseload Power (W)"),
    vertical_spacing=0.12,
)

colors = {"5%": "blue", "10%": "red", "15%": "green"}

# --- Plot 1: Energy (kWh) ---
for data, q in [(q05, "5%"), (q10, "10%"), (q15, "15%")]:
    fig.add_trace(
        go.Scatter(
            x=data["timestamp"],
            y=data["consumption_due_to_baseload_in_kilowatthour"],
            name=f"Baseload Energy (q={q})",
            mode="lines",
            line=dict(color=colors[q]),
        ),
        row=1,
        col=1,
    )

fig.add_trace(
    go.Scatter(
        x=q05["timestamp"],
        y=q05["total_consumption_in_kilowatthour"],
        name="Total Consumption",
        mode="lines",
        line=dict(color="gray", dash="dot"),
    ),
    row=1,
    col=1,
)

# --- Plot 2: Power (W) with medians ---
for data, q, median in [(q05, "5%", median_05), (q10, "10%", median_10), (q15, "15%", median_15)]:
    fig.add_trace(
        go.Scatter(
            x=data["timestamp"],
            y=data["average_daily_baseload_in_watt"],
            name=f"Daily Baseload (q={q})",
            mode="lines",
            line=dict(color=colors[q]),
        ),
        row=2,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=[data["timestamp"].min(), data["timestamp"].max()],
            y=[median, median],
            name=f"Median (q={q}): {median:.0f} W",
            mode="lines",
            line=dict(color=colors[q], dash="dash"),
        ),
        row=2,
        col=1,
    )

fig.update_layout(
    title="Base Load Comparison - Different Quantiles", height=700, legend=dict(x=1.02, y=1)
)
fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Energy (kWh)", row=1, col=1)
fig.update_yaxes(title_text="Power (W)", row=2, col=1)
fig.show()

In [10]:
monthly
# daily

timestamp,consumption_due_to_baseload_in_kilowatthour,total_consumption_in_kilowatthour,average_daily_baseload_in_watt,average_power_in_watt,consumption_due_to_median_baseload_in_kilowatthour,consumption_not_due_to_baseload_in_kilowatthour,baseload_ratio
"datetime[μs, Europe/Brussels]",f64,f64,f64,f64,f64,f64,f64
2025-01-01 00:00:00 CET,36.695,36.695,114.967742,49.321237,74.4,0.0,1.0
2025-02-01 00:00:00 CET,35.727,35.727,116.142857,53.165179,67.2,0.0,1.0
2025-03-01 00:00:00 CET,32.562,32.562,105.95424,43.825034,74.3,0.0,1.0
2025-04-01 00:00:00 CEST,27.663,27.663,90.533333,38.420833,72.0,0.0,1.0
2025-05-01 00:00:00 CEST,40.401,40.401,119.741935,54.302419,74.4,0.0,1.0
…,…,…,…,…,…,…,…
2025-08-01 00:00:00 CEST,29.253,29.253,92.258065,39.318548,74.4,0.0,1.0
2025-09-01 00:00:00 CEST,31.728,31.728,99.866667,44.066667,72.0,0.0,1.0
2025-10-01 00:00:00 CEST,32.294,32.294,99.087248,43.347651,74.5,0.0,1.0
2025-11-01 00:00:00 CET,55.788,55.788,214.533333,77.483333,72.0,0.0,1.0


## Key Insights
- Base load typically accounts for 20-40% of total consumption
- Higher ratios may indicate energy saving opportunities
- Analysis maintains timezone awareness throughout