In [16]:
import polars as pl
import json

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Base Load Analysis Demo with Polars

This notebook demonstrates analyzing base load (standby power consumption) using the BaseloadAnalyzer class. Base load represents the minimum continuous power draw in a system.

## Key Metrics
1. Base load value in WATTS - Consistent minimum power draw
2. Energy consumption in kWh - Power used over time
3. Base load percentage - Portion of total consumption that is baseline

## Data Requirements
Input data (LazyFrame):
- timestamp: datetime with timezone
- total: energy readings in kWh (15-minute intervals)


> ⚠️**Note:** we have several example files available,  
> *energy_use_big* is from a giant building with incredible base load.  
> *energy_use_test1* is from a regular family residence.


In [17]:
from openenergyid.baseload.analysis import BaseloadAnalyzer

# Define schema for data loading
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}

# Load example data with schema
# energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema)
energy_data = pl.read_json("data/PP/gapped_series.json", schema=schema)
energy_data = energy_data.lazy()

In [18]:
# Load the custom JSON structure
with open("data/PP/gapped_series.json") as f:
    json_data = json.load(f)

# Convert to DataFrame - handle timezones properly
energy_data = (
    pl.DataFrame(
        {
            "timestamp": pl.Series(json_data["data"]["index"]),
            "total": pl.Series(json_data["data"]["data"]),
        }
    )
    .with_columns(
        # Parse timestamps with explicit timezone
        pl.col("timestamp").str.to_datetime("%Y-%m-%dT%H:%M:%S%z").alias("timestamp")
    )
    .lazy()
)

print("After proper loading:")
print(f"energy_data length: {energy_data.select(pl.len()).collect().item()}")
if energy_data.select(pl.len()).collect().item() > 0:
    print("Sample energy_data:")
    print(energy_data.limit(5).collect())

After proper loading:
energy_data length: 32736
Sample energy_data:
shape: (5, 2)
┌─────────────────────────┬───────┐
│ timestamp               ┆ total │
│ ---                     ┆ ---   │
│ datetime[μs, UTC]       ┆ f64   │
╞═════════════════════════╪═══════╡
│ 2024-12-31 23:00:00 UTC ┆ 0.041 │
│ 2024-12-31 23:15:00 UTC ┆ 0.031 │
│ 2024-12-31 23:30:00 UTC ┆ 0.03  │
│ 2024-12-31 23:45:00 UTC ┆ 0.019 │
│ 2025-01-01 00:00:00 UTC ┆ 0.032 │
└─────────────────────────┴───────┘


## Initialize Analyzer
Set up analyzer with timezone and quantile settings

In [19]:
# Create analyzer (5% quantile = ~72 min of lowest daily values)
analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=0.15)

In [20]:
# Convert energy readings to power series
power_data = analyzer.prepare_power_series(energy_data)

## Analyze at Different Time Scales
Demonstrate flexibility in analysis periods

In [21]:
# Analyze at different granularities
print("Checking power_data...")
print(f"power_data length: {power_data.select(pl.len()).collect().item()}")
print(f"power_data timezone: {power_data.schema['timestamp']}")
if power_data.select(pl.len()).collect().item() > 0:
    print("Sample power_data:")
    print(power_data.limit(5).collect())

(hourly, hourly_baseload) = analyzer.analyze(power_data, "1h")
print(f"hourly_baseload: {hourly_baseload}")
hourly = hourly.collect()
print(f"hourly shape: {hourly.shape}")
if hourly.height > 0:
    print("Sample hourly:")
    print(hourly.limit(3))

(daily, daily_baseload) = analyzer.analyze(power_data, "1d")
print(f"daily_baseload: {daily_baseload}")
daily = daily.collect()
print(f"daily shape: {daily.shape}")
if daily.height > 0:
    print("Sample daily:")
    print(daily.limit(3))

(monthly, monthly_baseload) = analyzer.analyze(power_data, "1mo")
print(f"monthly_baseload: {monthly_baseload}")
monthly = monthly.collect()
print(f"monthly shape: {monthly.shape}")

# Show monthly summary
print("Monthly Base Load Analysis:")
print(monthly.select(["timestamp", "average_daily_baseload_in_watt", "baseload_ratio"]).head())

Checking power_data...
power_data length: 32736
power_data timezone: Datetime(time_unit='us', time_zone='Europe/Brussels')
Sample power_data:
shape: (5, 2)
┌───────────────────────────────┬───────┐
│ timestamp                     ┆ power │
│ ---                           ┆ ---   │
│ datetime[μs, Europe/Brussels] ┆ f64   │
╞═══════════════════════════════╪═══════╡
│ 2025-01-01 00:00:00 CET       ┆ 164.0 │
│ 2025-01-01 00:15:00 CET       ┆ 124.0 │
│ 2025-01-01 00:30:00 CET       ┆ 120.0 │
│ 2025-01-01 00:45:00 CET       ┆ 76.0  │
│ 2025-01-01 01:00:00 CET       ┆ 128.0 │
└───────────────────────────────┴───────┘
hourly_baseload: 100.0
hourly shape: (8184, 8)
Sample hourly:
shape: (3, 8)
┌────────────┬────────────┬────────────┬───────────┬───────────┬───────────┬───────────┬───────────┐
│ timestamp  ┆ consumptio ┆ total_cons ┆ average_d ┆ average_p ┆ consumpti ┆ consumpti ┆ baseload_ │
│ ---        ┆ n_due_to_b ┆ umption_in ┆ aily_base ┆ ower_in_w ┆ on_due_to ┆ on_not_du ┆ ratio     │
│ d





In [22]:
# Check one day in your power_data
one_day = power_data.filter(pl.col("timestamp").dt.date() == pl.date(2025, 1, 1))
one_day_power = one_day.select("power").collect()
print("One day power stats:")
print(f"Min: {one_day_power.min()}")
print(f"5th percentile: {one_day_power.quantile(0.05)}")
print(f"Mean: {one_day_power.mean()}")

One day power stats:
Min: shape: (1, 1)
┌───────┐
│ power │
│ ---   │
│ f64   │
╞═══════╡
│ 0.0   │
└───────┘
5th percentile: shape: (1, 1)
┌───────┐
│ power │
│ ---   │
│ f64   │
╞═══════╡
│ 0.0   │
└───────┘
Mean: shape: (1, 1)
┌────────┐
│ power  │
│ ---    │
│ f64    │
╞════════╡
│ 34.625 │
└────────┘


## Visualization Example
Plot daily base load vs total consumption

In [23]:
import plotly.express as px
import plotly.graph_objects as go

# Convert to pandas for plotting
daily_pd = daily.to_pandas()

# Create figure with basic lines
fig = px.line(
    daily_pd,
    x="timestamp",
    y=["consumption_due_to_baseload_in_kilowatthour", "total_consumption_in_kilowatthour"],
    title="Daily Base Load vs Total Consumption",
    labels={"value": "Energy (kWh)", "variable": "Type"},
)

# Add average lines
fig.add_hline(
    y=daily_pd["consumption_due_to_baseload_in_kilowatthour"].mean(),
    line_dash="dash",
    line_color="blue",
    annotation_text="Average Base Load",
)

fig.add_hline(
    y=daily_pd["total_consumption_in_kilowatthour"].mean(),
    line_dash="dash",
    line_color="red",
    annotation_text="Average Total Consumption",
)

# Update colors and legend
fig.update_traces(
    name="Base Load",
    line_color="blue",
    selector=dict(name="consumption_due_to_baseload_in_kilowatthour"),
)
fig.update_traces(
    name="Total Consumption",
    line_color="red",
    selector=dict(name="total_consumption_in_kilowatthour"),
)

fig.show()

# full 1 cell test with visualisation for known data

In [28]:
import polars as pl
from plotly.subplots import make_subplots

# Load data with schema
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}
energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema).sort("timestamp")


# Analyze with different quantiles
def analyze_quantile(q: float):
    analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=q)
    power_data = analyzer.prepare_power_series(energy_data)
    result, median = analyzer.analyze(power_data, "1d")
    return result.collect(), median


# Get results and medians for different quantiles
q05, median_05 = analyze_quantile(0.05)
q10, median_10 = analyze_quantile(0.10)
q15, median_15 = analyze_quantile(0.15)

# Create subplots
fig = make_subplots(
    rows=2,
    cols=1,
    subplot_titles=("Energy Consumption (kWh)", "Daily Baseload Power (W)"),
    vertical_spacing=0.12,
)

colors = {"5%": "blue", "10%": "red", "15%": "green"}

# --- Plot 1: Energy (kWh) ---
for data, q in [(q05, "5%"), (q10, "10%"), (q15, "15%")]:
    fig.add_trace(
        go.Scatter(
            x=data["timestamp"],
            y=data["consumption_due_to_baseload_in_kilowatthour"],
            name=f"Baseload Energy (q={q})",
            mode="lines",
            line=dict(color=colors[q]),
        ),
        row=1,
        col=1,
    )

fig.add_trace(
    go.Scatter(
        x=q05["timestamp"],
        y=q05["total_consumption_in_kilowatthour"],
        name="Total Consumption",
        mode="lines",
        line=dict(color="gray", dash="dot"),
    ),
    row=1,
    col=1,
)

# --- Plot 2: Power (W) with medians ---
for data, q, median in [(q05, "5%", median_05), (q10, "10%", median_10), (q15, "15%", median_15)]:
    # Daily baseload power
    fig.add_trace(
        go.Scatter(
            x=data["timestamp"],
            y=data["average_daily_baseload_in_watt"],
            name=f"Daily Baseload (q={q})",
            mode="lines",
            line=dict(color=colors[q]),
        ),
        row=2,
        col=1,
    )
    # Median horizontal line
    fig.add_trace(
        go.Scatter(
            x=[data["timestamp"].min(), data["timestamp"].max()],
            y=[median, median],
            name=f"Median (q={q}): {median:.0f} W",
            mode="lines",
            line=dict(color=colors[q], dash="dash"),
        ),
        row=2,
        col=1,
    )

fig.update_layout(
    title="Base Load Comparison - Different Quantiles",
    height=700,
    legend=dict(x=1.02, y=1),
)
fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Energy (kWh)", row=1, col=1)
fig.update_yaxes(title_text="Power (W)", row=2, col=1)

fig.show()

In [7]:
monthly
# daily

timestamp,consumption_due_to_baseload_in_kilowatthour,total_consumption_in_kilowatthour,average_daily_baseload_in_watt,average_power_in_watt,consumption_not_due_to_baseload_in_kilowatthour,baseload_ratio
"datetime[μs, Europe/Brussels]",f64,f64,f64,f64,f64,f64
2023-01-01 00:00:00 CET,70.340128,335.064964,94.670428,450.962266,264.724836,0.20993
2023-02-01 00:00:00 CET,66.816,310.068,99.428571,461.410714,243.252,0.215488
2023-03-01 00:00:00 CET,78.016,348.736,105.142857,469.994609,270.72,0.223711
2023-04-01 00:00:00 CEST,71.52,259.145,99.333333,359.923611,187.625,0.275984
2023-05-01 00:00:00 CEST,77.856,299.53,104.645161,402.594086,221.674,0.259927
…,…,…,…,…,…,…
2023-09-01 00:00:00 CEST,84.192,317.123,116.933333,440.448611,232.931,0.265487
2023-10-01 00:00:00 CEST,75.28,372.593,100.911528,499.454424,297.313,0.202044
2023-11-01 00:00:00 CET,70.56,342.402,98.0,475.558333,271.842,0.206074
2023-12-01 00:00:00 CET,91.2,386.054,122.580645,518.889785,294.854,0.236236


## Key Insights
- Base load typically accounts for 20-40% of total consumption
- Higher ratios may indicate energy saving opportunities
- Analysis maintains timezone awareness throughout