In [1]:
import polars as pl

%load_ext autoreload
%autoreload 2

# Base Load Analysis Demo with Polars

This notebook demonstrates analyzing base load (standby power consumption) using the BaseloadAnalyzer class. Base load represents the minimum continuous power draw in a system.

## Key Metrics
1. Base load value in WATTS - Consistent minimum power draw
2. Energy consumption in kWh - Power used over time
3. Base load percentage - Portion of total consumption that is baseline

## Data Requirements
Input data (LazyFrame):
- timestamp: datetime with timezone
- total: energy readings in kWh (15-minute intervals)


> ⚠️**Note:** we have several example files available,  
> *energy_use_big* is from a giant building with incredible base load.  
> *energy_use_test1* is from a regular family residence.


In [2]:
from openenergyid.baseload.analysis import BaseloadAnalyzer

# Define schema for data loading
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}

# Load example data with schema
energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema)

## Initialize Analyzer
Set up analyzer with timezone and quantile settings

In [3]:
# Create analyzer (5% quantile = ~72 min of lowest daily values)
analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=0.15)

# Convert energy readings to power series
power_data = analyzer.prepare_power_seriespolars(energy_data)

## Analyze at Different Time Scales
Demonstrate flexibility in analysis periods

In [4]:
# Analyze at different granularities
hourly = analyzer.analyze(power_data, "1h").collect()
daily = analyzer.analyze(power_data, "1d").collect()
monthly = analyzer.analyze(power_data, "1mo").collect()

# Show monthly summary
print("Monthly Base Load Analysis:")
print(monthly.select(["timestamp", "average_daily_baseload_in_watt", "baseload_ratio"]).head())

Monthly Base Load Analysis:
shape: (5, 3)
┌───────────────────────────────┬────────────────────────────────┬────────────────┐
│ timestamp                     ┆ average_daily_baseload_in_watt ┆ baseload_ratio │
│ ---                           ┆ ---                            ┆ ---            │
│ datetime[μs, Europe/Brussels] ┆ f64                            ┆ f64            │
╞═══════════════════════════════╪════════════════════════════════╪════════════════╡
│ 2023-01-01 00:00:00 CET       ┆ 94.670428                      ┆ 0.20993        │
│ 2023-02-01 00:00:00 CET       ┆ 99.428571                      ┆ 0.215488       │
│ 2023-03-01 00:00:00 CET       ┆ 105.142857                     ┆ 0.223711       │
│ 2023-04-01 00:00:00 CEST      ┆ 99.333333                      ┆ 0.275984       │
│ 2023-05-01 00:00:00 CEST      ┆ 104.645161                     ┆ 0.259927       │
└───────────────────────────────┴────────────────────────────────┴────────────────┘


## Visualization Example
Plot daily base load vs total consumption

In [5]:
import plotly.express as px
import plotly.graph_objects as go

# Convert to pandas for plotting
daily_pd = daily.to_pandas()

# Create figure with basic lines
fig = px.line(
    daily_pd,
    x="timestamp",
    y=["consumption_due_to_baseload_in_kilowatthour", "total_consumption_in_kilowatthour"],
    title="Daily Base Load vs Total Consumption",
    labels={"value": "Energy (kWh)", "variable": "Type"},
)

# Add average lines
fig.add_hline(
    y=daily_pd["consumption_due_to_baseload_in_kilowatthour"].mean(),
    line_dash="dash",
    line_color="blue",
    annotation_text="Average Base Load",
)

fig.add_hline(
    y=daily_pd["total_consumption_in_kilowatthour"].mean(),
    line_dash="dash",
    line_color="red",
    annotation_text="Average Total Consumption",
)

# Update colors and legend
fig.update_traces(
    name="Base Load",
    line_color="blue",
    selector=dict(name="consumption_due_to_baseload_in_kilowatthour"),
)
fig.update_traces(
    name="Total Consumption",
    line_color="red",
    selector=dict(name="total_consumption_in_kilowatthour"),
)

fig.show()

In [6]:
import polars as pl
import plotly.express as px

# Load data with schema
schema = {"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64}
energy_data = pl.scan_ndjson("data/PP/energy_use_test1.ndjson", schema=schema).sort("timestamp")


# Analyze with different quantiles
def analyze_quantile(q: float):
    analyzer = BaseloadAnalyzer(timezone="Europe/Brussels", quantile=q)
    power_data = analyzer.prepare_power_seriespolars(energy_data)
    return analyzer.analyze(power_data, "1d").collect()


# Get results for different quantiles
q05 = analyze_quantile(0.05)
q10 = analyze_quantile(0.10)
q15 = analyze_quantile(0.15)

# Create visualization
fig = go.Figure()

# Add lines for each quantile
for data, q in [(q05, "5%"), (q10, "10%"), (q15, "15%")]:
    fig.add_trace(
        go.Scatter(
            x=data["timestamp"],
            y=data["consumption_due_to_baseload_in_kilowatthour"],
            name=f"Base Load (q={q})",
            mode="lines",
        )
    )

# Add total consumption line
fig.add_trace(
    go.Scatter(
        x=q05["timestamp"],
        y=q05["total_consumption_in_kilowatthour"],
        name="Total Consumption",
        mode="lines",
        line=dict(color="gray", dash="dot"),  # Make it dotted gray to distinguish
    )
)

fig.update_layout(
    title="Base Load Comparison - Different Quantiles",
    xaxis_title="Date",
    yaxis_title="Energy (kWh)",
)

fig.show()

# Print average values
print("\nAverage Base Load (kWh):")
print(f"5% quantile: {q05['consumption_due_to_baseload_in_kilowatthour'].mean():.3f}")
print(f"10% quantile: {q10['consumption_due_to_baseload_in_kilowatthour'].mean():.3f}")
print(f"15% quantile: {q15['consumption_due_to_baseload_in_kilowatthour'].mean():.3f}")


Average Base Load (kWh):
5% quantile: 1.907
10% quantile: 2.266
15% quantile: 2.447


In [7]:
monthly
# daily

timestamp,consumption_due_to_baseload_in_kilowatthour,total_consumption_in_kilowatthour,average_daily_baseload_in_watt,average_power_in_watt,consumption_not_due_to_baseload_in_kilowatthour,baseload_ratio
"datetime[μs, Europe/Brussels]",f64,f64,f64,f64,f64,f64
2023-01-01 00:00:00 CET,70.340128,335.064964,94.670428,450.962266,264.724836,0.20993
2023-02-01 00:00:00 CET,66.816,310.068,99.428571,461.410714,243.252,0.215488
2023-03-01 00:00:00 CET,78.016,348.736,105.142857,469.994609,270.72,0.223711
2023-04-01 00:00:00 CEST,71.52,259.145,99.333333,359.923611,187.625,0.275984
2023-05-01 00:00:00 CEST,77.856,299.53,104.645161,402.594086,221.674,0.259927
…,…,…,…,…,…,…
2023-09-01 00:00:00 CEST,84.192,317.123,116.933333,440.448611,232.931,0.265487
2023-10-01 00:00:00 CEST,75.28,372.593,100.911528,499.454424,297.313,0.202044
2023-11-01 00:00:00 CET,70.56,342.402,98.0,475.558333,271.842,0.206074
2023-12-01 00:00:00 CET,91.2,386.054,122.580645,518.889785,294.854,0.236236


## Key Insights
- Base load typically accounts for 20-40% of total consumption
- Higher ratios may indicate energy saving opportunities
- Analysis maintains timezone awareness throughout

In [8]:
import polars as pl
import plotly.graph_objects as go

# Assuming 'q05' is your DataFrame with the daily 5% quantile analysis results
# If not, please run the analysis cell first:
# q05 = analyze_quantile(0.05)

# --- Data Preparation ---
# 1. Filter for the last 12 full months (Year 2023)
# 2. Aggregate the daily data into monthly sums
monthly_agg = (
    q05.filter(pl.col("timestamp").dt.year() == 2023)
    .group_by_dynamic("timestamp", every="1mo", closed="left")
    .agg(
        [
            pl.sum("consumption_due_to_baseload_in_kilowatthour").alias("baseload_kwh"),
            pl.sum("consumption_not_due_to_baseload_in_kilowatthour").alias("variable_kwh"),
        ]
    )
    .sort("timestamp")
)


# --- Create the Plotly Figure ---
fig = go.Figure()

# Add the bar for Baseload Consumption
fig.add_trace(
    go.Bar(
        x=monthly_agg["timestamp"],
        y=monthly_agg["baseload_kwh"],
        name="Baseload Consumption",
        marker_color="#1f77b4",  # A nice dark blue
    )
)

# Add the bar for Variable (Non-Baseload) Consumption
fig.add_trace(
    go.Bar(
        x=monthly_agg["timestamp"],
        y=monthly_agg["variable_kwh"],
        name="Variable Consumption",
        marker_color="#aec7e8",  # A lighter blue
    )
)

# --- Customize the Layout ---
fig.update_layout(
    barmode="stack",  # This creates the stacked bar effect
    title_text="Monthly Energy Consumption: Baseload vs. Variable (5% Quantile)",
    xaxis_title="Month",
    yaxis_title="Energy Consumption (kWh)",
    xaxis=dict(
        tickformat="%b %Y",  # Format x-axis labels as 'Jan 2023'
        dtick="M1",  # Ensure a tick for every month
    ),
    legend_title_text="Consumption Type",
    plot_bgcolor="white",
)

# Show the figure in your notebook
fig.show()

In [11]:
import polars as pl
import plotly.graph_objects as go

# Assuming 'q05' is your DataFrame with the daily 5% quantile analysis results
# If not, please run the analysis cell first:
# q05 = analyze_quantile(0.05)


# --- Data Preparation ---
# We will use the daily data directly to create a continuous line graph.
# 1. Filter for the full year of 2023.
# 2. Sort by timestamp to ensure the line connects correctly.
daily_power_data = q10.filter(pl.col("timestamp").dt.year() == 2023).sort("timestamp")


# --- Create the Plotly Figure ---
fig = go.Figure()

# Add the Total Average Power as a simple line on top
fig.add_trace(
    go.Scatter(
        x=daily_power_data["timestamp"],
        y=daily_power_data["average_power_in_watt"],
        mode="lines",
        line=dict(width=2, color="#1f77b4"),  # A nice blue
        name="Total Average Power",
    )
)

# Add the Baseload Power as a filled area chart at the bottom
# The 'fill='tozeroy'' command fills the area from the line down to the y=0 axis.
fig.add_trace(
    go.Scatter(
        x=daily_power_data["timestamp"],
        y=daily_power_data["average_daily_baseload_in_watt"],
        mode="lines",
        fill="tozeroy",  # This creates the filled area effect
        line=dict(width=0.5, color="#ff7f0e"),  # A distinct orange
        name="Baseload Power",
    )
)


# --- Customize the Layout ---
fig.update_layout(
    title_text="Daily Average Power: Baseload vs. Total Usage (2023)",
    xaxis_title="Date",
    yaxis_title="Average Power (Watts)",
    legend_title_text="Power Component",
    plot_bgcolor="white",
    hovermode="x unified",  # Shows data for both traces when you hover
    yaxis_rangemode="tozero",  # Ensures the y-axis starts at 0
)

# Show the figure in your notebook
fig.show()

In [15]:
import polars as pl
import plotly.graph_objects as go

# --- Data Preparation ---
# Calculate the total sum of baseload and variable energy for the entire period
total_baseload_kwh = q05["consumption_due_to_baseload_in_kilowatthour"].sum()
total_variable_kwh = q05["consumption_not_due_to_baseload_in_kilowatthour"].sum()

labels = ["Baseload Consumption", "Variable Consumption"]
values = [total_baseload_kwh, total_variable_kwh]

# --- Create the Plotly Figure ---
fig = go.Figure(
    data=[
        go.Pie(
            labels=labels,
            values=values,
            pull=[0.1, 0],  # "pull" out the baseload slice for emphasis
            marker_colors=["#ff7f0e", "#1f77b4"],  # Orange for baseload, Blue for variable
            hole=0.3,  # Creates a donut chart, which is often easier to read
        )
    ]
)

# --- Customize the Layout ---
fig.update_layout(title_text="Total Energy Breakdown (2023)", legend_title_text="Consumption Type")

fig.show()

In [None]:
import polars as pl
import plotly.graph_objects as go
import statsmodels.api as sm

# --- Data Preparation for LOESS ---
# We use the daily data from 2023
daily_power_data = q05.filter(pl.col("timestamp").dt.year() == 2023).sort("timestamp")

# LOESS requires numerical x-values, so we convert timestamps to days since start
x_vals = (
    (daily_power_data["timestamp"] - daily_power_data["timestamp"].min()).dt.total_days().to_numpy()
)
y_vals = daily_power_data["average_power_in_watt"].to_numpy()

# Calculate the LOESS smoothed values
# The 'frac' parameter controls smoothness (0.2 means use 20% of data for each point)
smoothed_vals = sm.nonparametric.lowess(y_vals, x_vals, frac=0.2)

# --- Create the Plotly Figure ---
fig = go.Figure()

# Add the raw daily data as a faint scatter plot
fig.add_trace(
    go.Scatter(
        x=daily_power_data["timestamp"],
        y=daily_power_data["average_power_in_watt"],
        mode="markers",
        marker=dict(color="lightblue", size=5, opacity=0.5),
        name="Daily Average Power",
    )
)

# Add the LOESS trendline on top
fig.add_trace(
    go.Scatter(
        x=daily_power_data["timestamp"],
        y=smoothed_vals[:, 1],  # lowess returns a 2D array, we need the second column
        mode="lines",
        line=dict(color="darkred", width=3),
        name="Underlying Trend (LOESS)",
    )
)

# --- Customize the Layout ---
fig.update_layout(
    title_text="Underlying Trend of Daily Power Consumption (2023)",
    xaxis_title="Date",
    yaxis_title="Average Power (Watts)",
    legend_title_text="Data Series",
    plot_bgcolor="white",
)
fig.show()

In [13]:
import polars as pl
import plotly.graph_objects as go

daily_power_data = q05.filter(pl.col("timestamp").dt.year() == 2023)

# --- Create the Plotly Figure ---
fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=daily_power_data["average_power_in_watt"],
        name="Power Distribution",
        marker_color="#2ca02c",  # Green
    )
)

# Add a vertical line showing the average baseload for context
avg_baseload = daily_power_data["average_daily_baseload_in_watt"].mean()
fig.add_vline(
    x=avg_baseload,
    line_width=3,
    line_dash="dash",
    line_color="black",
    annotation_text=f"Avg. Baseload: {avg_baseload:.0f} W",
    annotation_position="top right",
)

# --- Customize the Layout ---
fig.update_layout(
    title_text="Distribution of Daily Average Power Levels (2023)",
    xaxis_title="Average Power (Watts)",
    yaxis_title="Number of Days",
    plot_bgcolor="white",
)
fig.show()

In [14]:
import polars as pl
import plotly.graph_objects as go

# --- Data Preparation ---
# Extract time-based features needed for the calendar
calendar_data = q05.filter(pl.col("timestamp").dt.year() == 2023).with_columns(
    weekday=pl.col("timestamp").dt.weekday(),  # Monday=1, Sunday=7
    week_of_year=pl.col("timestamp").dt.week(),
    hover_text=pl.concat_str(
        [
            pl.col("timestamp").dt.strftime("%A, %b %d"),
            pl.lit("<br>Power: "),
            pl.col("average_power_in_watt").round(0).cast(pl.Utf8),
            pl.lit(" W"),
        ]
    ),
)

# --- Create the Plotly Figure ---
fig = go.Figure(
    data=go.Heatmap(
        x=calendar_data["week_of_year"],
        y=calendar_data["weekday"],
        z=calendar_data["average_power_in_watt"],
        text=calendar_data["hover_text"],
        hoverinfo="text",
        colorscale="Viridis",
        showscale=True,
        colorbar={"title": "Avg. Power (W)"},
    )
)

# --- Customize the Layout ---
fig.update_layout(
    title="Daily Average Power Calendar Heatmap (2023)",
    yaxis=dict(
        tickvals=[1, 2, 3, 4, 5, 6, 7],
        ticktext=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
        title="Day of Week",
    ),
    xaxis_title="Week of the Year",
    plot_bgcolor="white",
)
fig.show()