# Elhub API data - Gridloss - Summerproject 2025

## Visualization

Bjørn Eirik Rognskog Nordbak

### Importing data from Elhub API
https://api.elhub.no/energy-data-api#/grid-areas

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

oslo = ZoneInfo("Europe/Oslo")

def fetch_window(start_dt, end_dt):
    params = {
        "dataset":   "LOSS_PER_MGA_HOUR",
        "startDate": start_dt.isoformat(),
        "endDate":   end_dt.isoformat(),
    }
    url = "https://api.elhub.no/energy-data/v0/grid-areas"
    resp = requests.get(url, params=params)
    obj = resp.json()
    
    # --- safeguard: if there's no "data", bail with empty DF ----
    raw = obj.get("data")
    if raw is None:
        print(f"  → no 'data' for {start_dt.date()} → {end_dt.date()}, skipping")
        return pd.DataFrame()
    
    # otherwise flatten
    df = pd.json_normalize(
        raw,
        record_path=["attributes", "lossPerMgaHour"],
        meta=[
            ["attributes", "eic"],
            ["attributes", "name"],
            ["attributes", "status"],
        ],
        errors="ignore"
    ).rename(columns={
        "attributes.eic":    "eic",
        "attributes.name":   "name",
        "attributes.status": "status",
    })
    return df

# loop as before
span_start = datetime(2023,1,1,0,0, tzinfo=oslo)
span_end   = datetime(2025,7,1,0,0, tzinfo=oslo)
window = timedelta(days=7)

all_chunks = []
cur = span_start
while cur < span_end:
    nxt = min(cur + window, span_end)
    print(f"Fetching {cur.date()} → {nxt.date()}")
    dfc = fetch_window(cur, nxt)
    all_chunks.append(dfc)
    cur = nxt

big_df = pd.concat(all_chunks, ignore_index=True)

### Save the data to a CSV file (optional)

In [None]:
import pandas as pd

# 1. Save your DataFrame to CSV
# Replace big_df with your DataFrame variable
big_df.to_csv('big_df.csv', index=False)

### Read the CSV file (if needed)

In [None]:
import pandas as pd

# 2. Load the DataFrame back from CSV
# This will recreate the DataFrame exactly as it was (aside from types inference)
big_df = pd.read_csv('big_df.csv')

big_df

## Data quality

### Data quality report

In [None]:
import pandas as pd

def humanized_dq_report_no(df):
    # grunnleggende statistikk
    present     = df.notnull().sum()
    missing     = df.isnull().sum()
    pct_missing = (missing / len(df) * 100).round(1)
    unique_vals = df.nunique(dropna=False)
    dtypes      = df.dtypes.astype(str)
    
    # oversett dtype til vennlig betegnelse
    def friendly_dtype(dt):
        if "float" in dt or "int" in dt:
            return "Numerisk"
        if "datetime" in dt:
            return "Dato/Tid"
        return "Tekst"
    
    # bygg rapporten med norske kolonnenavn
    report = pd.DataFrame({
        "Kolonne":       present.index,
        "Datatype":      [friendly_dtype(d) for d in dtypes],
        "Tilstede":      present.values,
        "Mangler":       missing.values,
        "% Mangler":     pct_missing.values,
        "Unike verdier": unique_vals.values
    })
    
    # spesifiser rekkefølgen
    cols = ["Kolonne", "Datatype", "Tilstede", "Mangler", "% Mangler", "Unike verdier"]
    return report[cols]

# generer rapport
dq = humanized_dq_report_no(big_df)

# fjern index-visning
dq_display = dq.copy()
dq_display.index = [""] * len(dq_display)

# vis med formatering
fmt = {
    "Tilstede":      "{:,}",
    "Mangler":       "{:,}",
    "% Mangler":     "{:.1f}%",
    "Unike verdier": "{:,}"
}

dq_display.style \
    .format(fmt) \
    .set_caption(f"Datakvalitetssammendrag ({len(big_df):,} rader × {big_df.shape[1]} kolonner)") \
    .set_table_styles([
        {
            "selector": "caption",
            "props": [
                ("caption-side","bottom"),
                ("font-style","italic"),
                ("text-align","left"),
            ]
        }
    ])


### Save the data quality report to a .tex table

In [None]:
import os

# 1) sørg for at output-mappen finnes
os.makedirs("tabeller", exist_ok=True)

# 2) kopier og formater akkurat som før
dq_tex = dq.copy()
dq_tex["Tilstede"]      = dq_tex["Tilstede"].map("{:,}".format)
dq_tex["Mangler"]       = dq_tex["Mangler"].map("{:,}".format)
dq_tex["% Mangler"]     = dq_tex["% Mangler"].map("{:.1f}\\%".format)
dq_tex["Unike verdier"] = dq_tex["Unike verdier"].map("{:,}".format)

# 3) endre kolonneoverskrift fra "% Mangler" til "\% Mangler"
dq_tex = dq_tex.rename(columns={"% Mangler": r"\% Mangler"})

# 4) eksporter kun tabellen (ingen float-wrapper, caption, label osv.)
tabular_str = dq_tex.to_latex(
    index=False,
    longtable=False,
    caption=None,
    label=None,
    escape=False      # behold LaTeX-markup i cellene
)

# 5) skriv ut til .tex-fil
ut_path = "tables/datakvalitet_gridarea_tabular.tex"
with open(ut_path, "w") as f:
    f.write(tabular_str)

print(f"Skrev tabell-fil til {ut_path}")


### Checking for temporal consistency

In [None]:
import pandas as pd
from IPython.display import display

# ── 1) Make sure startTime is a real datetime ───────────────────────────────
big_df['startTime'] = pd.to_datetime(big_df['startTime'], utc=True)

# ── 2) Sort by gridArea and startTime ────────────────────────────────────────
df = (
    big_df
    .sort_values(['gridArea', 'startTime'])
    .reset_index(drop=True)
)

# ── 3) Compute the delta between each startTime and the previous one ────────
df['delta'] = df.groupby('gridArea')['startTime'].diff()

# ── 4) Filter out the perfect 1‑hour steps ──────────────────────────────────
expected = pd.Timedelta(hours=1)
irregular = df[df['delta'].notna() & (df['delta'] != expected)]

# ── 5) Inspect results ─────────────────────────────────────────────────────
print(f"🔎 Found {len(irregular)} irregular intervals across "
      f"{irregular['gridArea'].nunique()} gridAreas\n")

# show first 20 problematic rows
display(irregular[['gridArea', 'startTime', 'delta']].head(20))

# quick summary per gridArea
summary = (
    irregular
    .groupby('gridArea')['delta']
    .agg(count='count', total='sum', average='mean', maximum='max')
    .reset_index()
)
print("— Summary by gridArea —")
display(summary)

# (optional) write out to CSV for deeper analysis
irregular.to_csv('irregular_intervals.csv', index=False)
summary.to_csv('irregular_summary.csv', index=False)
print("\n▶️  Detailed rows → irregular_intervals.csv")
print("▶️  Summary stats → irregular_summary.csv")


In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

areas = ['AEN1 RN', 'AEN2 DN']  # ← swap in any gridAreas you want

for area in areas:
    sub = (
        df[df['gridArea'] == area]
        .sort_values('startTime')
        .reset_index(drop=True)
        .copy()
    )
    # compute delta (hours) and track previous timestamp
    sub['delta_h']   = sub['startTime'].diff().dt.total_seconds() / 3600
    sub['prev_time'] = sub['startTime'].shift(1)
    
    # pick out only the “real” gaps
    gaps = sub[(sub['delta_h'].notna()) & (sub['delta_h'] != 1)]
    
    # 1) Print each gap’s start/end
    print(f"\n⚠️  Gaps in {area}:")
    if gaps.empty:
        print("   (none!)")
    else:
        for _, row in gaps.iterrows():
            print(f"   • from {row['prev_time']} → {row['startTime']}  (Δ = {row['delta_h']:.1f} h)")
    
    # 2) Plot the full delta‑series with highlighted spikes
    fig, ax = plt.subplots(figsize=(10,4))
    ax.plot(sub['startTime'], sub['delta_h'], label='hourly Δ')
    ax.scatter(gaps['startTime'], gaps['delta_h'], color='red', zorder=5, label='gaps')
    for x in gaps['startTime']:
        ax.axvline(x=x, color='red', alpha=0.3)
    
    # sparse, readable x‑ticks every 2 months
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.xticks(rotation=45)
    
    ax.set_title(f"Hourly Δ between startTimes — {area}")
    ax.set_ylabel("Δ (hours)")
    ax.set_xlabel("Date")
    ax.legend(loc='upper right')
    plt.tight_layout()
    plt.show()


In [None]:
import numpy as np

# — convert the Timedelta column 'delta' into hours (float)
hrs = irregular['delta'].dt.total_seconds() / 3600

# 1) What unique gap‐sizes (in hours) do we see globally?
global_hours = np.sort(hrs.unique())
print("🔍 All distinct irregular Δ (hours):", global_hours)

if len(global_hours) == 1:
    print(f"✅ Yes — every gap is exactly {global_hours[0]:.1f} hours.")
else:
    print("⚠️ No — there are multiple gap sizes in the data.")

# 2) Check per gridArea whether each area only has that one gap size
per_area = (
    irregular
    .assign(hours=hrs)
    .groupby('gridArea')['hours']
    .agg(n_unique='nunique', min_h='min', max_h='max')
    .reset_index()
)

# how many areas have more than one gap size?
bad = per_area[per_area['n_unique'] > 1]
if bad.empty:
    print("✅ Every gridArea has a single, consistent gap size.")
else:
    print(f"⚠️ {len(bad)} gridAreas have >1 gap size (they’d show up below):")
    display(bad)


## Exploring the Elhub API gridloss data

### Unique eic codes

In [None]:
# Count unique EIC codes
unique_eic_count = big_df['eic'].nunique()
print(f"Number of unique EIC codes: {unique_eic_count}")

# List them out
unique_eics = big_df['eic'].unique()
print(unique_eics)


### Unique names entries

In [None]:
# Count unique name entries
unique_name_count = big_df['name'].nunique()
print(f"Number of unique names: {unique_name_count}")

# List all unique names
unique_names = big_df['name'].unique()
print(unique_names)


### Unique gridArea entries

In [None]:
# Count unique gridArea entries
unique_gridarea_count = big_df['gridArea'].nunique()
print(f"Number of unique grid areas: {unique_gridarea_count}")

# List all unique gridArea codes
unique_gridareas = big_df['gridArea'].unique()
print(unique_gridareas)


### Unique priceArea entries

In [None]:
# Count unique priceArea entries
unique_pricearea_count = big_df['priceArea'].nunique()
print(f"Number of unique price areas: {unique_pricearea_count}")

# List all unique priceArea codes
unique_priceareas = big_df['priceArea'].unique()
print(unique_priceareas)


### Average hourly grid loss per price area

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 24
AXIS_TITLE_FS   = 20
TICK_FS         = 18
BAR_TEXT_FS     = 18
FONT_FAMILY     = "Roboto"  # Elhub font (requires it to be installed locally)
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (primær + sekundær)
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# ─── LOAD & PREPARE DATA ───────────────────────────────────────────────────────

# Assume `big_df` is already in your namespace. If not, load it here:
# big_df = pd.read_csv("path_to_your_data.csv")

# Make a working copy
df = big_df.copy()

# Ensure startTime is datetime64[ns, UTC] → datetime64[ns, Europe/Oslo]
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# Extract span dates for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# ─── AGGREGATION: AVERAGE HOURLY LOSS PER PRICE AREA ────────────────────────

# Compute the mean loss per hourly observation, grouped by priceArea
avg_hourly_area = (
    df
    .groupby('priceArea')['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgHourlyLossKWh'})
)

# ─── BUILD & STYLE THE BAR CHART ─────────────────────────────────────────────

# Dynamic title
title_text = (
    f"Average Hourly Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

fig = px.bar(
    avg_hourly_area,
    x='priceArea',
    y='avgHourlyLossKWh',
    color='priceArea',
    text='avgHourlyLossKWh',
    title=title_text,
    labels={'avgHourlyLossKWh': 'Avg Hourly Loss (kWh)'},
    color_discrete_sequence=elhub_colors
)

fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    hovertemplate='%{x}: %{y:,.0f} kWh'
)

fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title='',                # no x-axis title
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# Show interactive chart
fig.show()

# ─── EXPORT TO PDF ───────────────────────────────────────────────────────────

output_path = "average_hourly_loss_per_price_area_bar.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Average daily grid loss per price area

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 24
AXIS_TITLE_FS   = 20
TICK_FS         = 18
BAR_TEXT_FS     = 18
FONT_FAMILY     = "Roboto"  # Elhub font (requires it to be installed locally)
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (primær + sekundær)
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# Ensure startTime is localized
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# Span for dynamic title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# Daily aggregation
df['date'] = df['startTime'].dt.date.astype(str)
daily_totals = (
    df.groupby(['date', 'priceArea'])['calculatedLossQuantityKwh']
      .sum().reset_index()
)
avg_daily = (
    daily_totals
    .groupby('priceArea')['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgDailyLossKWh'})
)

# Title
title_text = (
    f"Average Daily Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

# Plot
fig = px.bar(
    avg_daily,
    x='priceArea',
    y='avgDailyLossKWh',
    color='priceArea',
    text='avgDailyLossKWh',
    title=title_text,
    labels={'avgDailyLossKWh': 'Avg Daily Loss (kWh)'},
    color_discrete_sequence=elhub_colors
)

fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

fig.show()

# Export to PDF
output_path = "average_daily_loss_per_price_area_bar.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Average monthly grid loss per price area

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 24
AXIS_TITLE_FS   = 20
TICK_FS         = 18
BAR_TEXT_FS     = 18
FONT_FAMILY     = "Roboto"  # Elhub font (requires it to be installed locally)
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (primær + sekundær)
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# Ensure startTime is localized
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# Span for dynamic title (use first and last month)
span_start = df['startTime'].min().to_period('M').to_timestamp().date().isoformat()
span_end   = df['startTime'].max().to_period('M').to_timestamp().date().isoformat()

# Monthly aggregation
df['month'] = df['startTime'].dt.to_period('M').astype(str)  # e.g. "2025-06"
monthly_totals = (
    df.groupby(['month', 'priceArea'])['calculatedLossQuantityKwh']
      .sum()
      .reset_index()
)
avg_monthly = (
    monthly_totals
    .groupby('priceArea')['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgMonthlyLossKWh'})
)

# Title
title_text = (
    f"Average Monthly Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

# Plot
fig = px.bar(
    avg_monthly,
    x='priceArea',
    y='avgMonthlyLossKWh',
    color='priceArea',
    text='avgMonthlyLossKWh',
    title=title_text,
    labels={'avgMonthlyLossKWh': 'Avg Monthly Loss (kWh)'},
    color_discrete_sequence=elhub_colors
)

fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

fig.show()

# Export to PDF
output_path = "average_monthly_loss_per_price_area_bar.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Diurnal Profile of Grid Loss - sorted by price area

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS      = 24
AXIS_TITLE_FS = 20
TICK_FS       = 18
MARKER_SIZE   = 8
FONT_FAMILY   = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (primær + sekundær)
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# 1) Ensure startTime is localized
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Span for dynamic title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract hour of day
df['hour'] = df['startTime'].dt.hour

# 4) Compute average loss per hour across all days, per priceArea
avg_hourly = (
    df
    .groupby(['hour', 'priceArea'], observed=True)['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgHourlyLossKWh'})
)

# 5) Compute each priceArea’s share of that hour’s total
hourly_totals = avg_hourly.groupby('hour')['avgHourlyLossKWh'].transform('sum')
avg_hourly['pctOfHour'] = avg_hourly['avgHourlyLossKWh'] / hourly_totals

# 6) Title
title_text = (
    f"Average Hourly Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Plot, carrying priceArea & pctOfHour in custom_data
fig = px.line(
    avg_hourly,
    x='hour',
    y='avgHourlyLossKWh',
    color='priceArea',
    custom_data=['priceArea','pctOfHour'],
    markers=True,
    title=title_text,
    labels={
        'hour': 'Hour of Day',
        'avgHourlyLossKWh': 'Avg Hourly Loss (kWh)'
    },
    color_discrete_sequence=elhub_colors
)

# 8) Style traces & hovertemplate
fig.update_traces(
    marker=dict(size=MARKER_SIZE),
    line=dict(width=3),
    hovertemplate=(
        "%{y:,.0f} kWh<br>"
        "Hour: %{x}<br>"
        "Area: %{customdata[0]}<br>"
        "Share: %{customdata[1]:.1%}"
        "<extra></extra>"
    )
)

# 9) Layout styling
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        dtick=1
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_text='Price Area',
    legend=dict(
        title_font_size=AXIS_TITLE_FS,
        font_size=TICK_FS,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='center',
        x=0.5
    ),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 10) Show & export
fig.show()

output_path = "diurnal_profile_loss_per_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Diurnal Profile of Grid Loss - sorted by season

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
from pandas.api.types import CategoricalDtype

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS      = 24
AXIS_TITLE_FS = 20
TICK_FS       = 18
MARKER_SIZE   = 8
FONT_FAMILY   = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (we’ll use the first four for the four seasons)
season_colors = [
    "#212148",  # winter (Mørk Lilla)
    "#7fb48a",  # spring (Grønn)
    "#5369b2",  # summer (Blå)
    "#05677d",  # autumn (Blå-grønn)
]

# 1) Copy & localize timestamps
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Build dynamic title span
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract month & hour
df['month'] = df['startTime'].dt.month
df['hour']  = df['startTime'].dt.hour

# 4) Map month → season
season_map = {
    **dict.fromkeys([12, 1, 2],   'winter'),
    **dict.fromkeys([3, 4, 5],    'spring'),
    **dict.fromkeys([6, 7, 8],    'summer'),
    **dict.fromkeys([9, 10, 11],  'autumn'),
}
df['season'] = df['month'].map(season_map)

# 5) Enforce season ordering
season_cat = CategoricalDtype(['winter','spring','summer','autumn'], ordered=True)
df['season'] = df['season'].astype(season_cat)

# 6) Compute avg loss per hour & season
avg_season_hourly = (
    df
    .groupby(['season', 'hour'], observed=True)['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh': 'avgHourlyLossKWh'})
)

# 7) Compute each season’s share of that hour’s total
hourly_totals = avg_season_hourly.groupby('hour')['avgHourlyLossKWh'].transform('sum')
avg_season_hourly['pctOfHour'] = avg_season_hourly['avgHourlyLossKWh'] / hourly_totals

# 8) Title text
title_text = (
    f"Average Hourly Loss by Season\n"
    f"({span_start} to {span_end})"
)

# 9) Create the line chart, carrying season & pctOfHour in custom_data
fig = px.line(
    avg_season_hourly,
    x='hour',
    y='avgHourlyLossKWh',
    color='season',
    custom_data=['season', 'pctOfHour'],
    markers=True,
    title=title_text,
    labels={
        'hour': 'Hour of Day',
        'avgHourlyLossKWh': 'Avg Hourly Loss (kWh)',
        'season': 'Season'
    },
    color_discrete_sequence=season_colors
)

# 10) Style traces & hovertemplate
fig.update_traces(
    marker=dict(size=MARKER_SIZE),
    line=dict(width=3),
    hovertemplate=(
        "%{y:,.0f} kWh<br>"
        "Hour: %{x}<br>"
        "Season: %{customdata[0]}<br>"
        "Share: %{customdata[1]:.1%}"
        "<extra></extra>"
    )
)

# 11) Layout styling
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        dtick=1
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_text='Season',
    legend=dict(
        title_font_size=AXIS_TITLE_FS,
        font_size=TICK_FS,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='center',
        x=0.5
    ),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 12) Show & export
fig.show()

output_path = "diurnal_profile_loss_by_season.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Average hourly net infeed in kwh per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS        = 24
AXIS_TITLE_FS   = 20
TICK_FS         = 18
BAR_TEXT_FS     = 18
FONT_FAMILY     = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

elhub_colors = [
    "#212148", "#7fb48a", "#5369b2",
    "#05677d", "#886599", "#d58000",
]

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title (floor to hour)
span_start = df['startTime'].min().floor('h').strftime('%Y-%m-%d %H:00')
span_end   = df['startTime'].max().floor('h').strftime('%Y-%m-%d %H:00')

# 3) Extract hour (YYYY-MM-DD HH:00)
df['hour'] = df['startTime'].dt.floor('h').astype(str)

# 4) Sum net infeed into hourly totals per priceArea
hourly_totals = (
    df
    .groupby(['hour', 'priceArea'])['netInfeedQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those hourly totals for each priceArea
avg_hourly_infeed = (
    hourly_totals
    .groupby('priceArea')['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgHourlyInfeedKWh'})
)

# 6) Build dynamic title
title_text = (
    f"Average Hourly Net Infeed per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the bar chart with Elhub colours
fig = px.bar(
    avg_hourly_infeed,
    x='priceArea',
    y='avgHourlyInfeedKWh',
    color='priceArea',
    text='avgHourlyInfeedKWh',
    title=title_text,
    labels={'avgHourlyInfeedKWh':'Avg Hourly Net Infeed (kWh)'},
    color_discrete_sequence=elhub_colors
)

# 8) Place values inside bars in white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 9) Style the layout, center title, set Roboto
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 10) Render
fig.show()

# 11) Export to PDF
output_path = "average_hourly_net_infeed_per_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Average daily net infeed in kwh per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS        = 24   # plot title
AXIS_TITLE_FS   = 20   # x/y axis titles
TICK_FS         = 18   # x/y tick labels
BAR_TEXT_FS     = 18   # number labels inside bars
FONT_FAMILY     = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract date (day)
df['date'] = df['startTime'].dt.date.astype(str)

# 4) Sum hourly infeed into daily totals per priceArea
daily_totals = (
    df
    .groupby(['date', 'priceArea'])['netInfeedQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those daily totals for each priceArea
avg_daily_infeed = (
    daily_totals
    .groupby('priceArea')['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgDailyInfeedKWh'})
)

# 6) Build dynamic title
title_text = (
    f"Average Daily Net Infeed per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the bar chart with Elhub colours
fig = px.bar(
    avg_daily_infeed,
    x='priceArea',
    y='avgDailyInfeedKWh',
    color='priceArea',
    text='avgDailyInfeedKWh',
    title=title_text,
    labels={'avgDailyInfeedKWh':'Avg Daily Net Infeed (kWh)'},
    color_discrete_sequence=elhub_colors
)

# 8) Place values inside bars in white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 9) Style the layout, center title, set Roboto
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 10) Render
fig.show()

# 11) Export to PDF
output_path = "average_daily_net_infeed_per_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Average monthly net infeed in kwh per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS        = 24
AXIS_TITLE_FS   = 20
TICK_FS         = 18
BAR_TEXT_FS     = 18
FONT_FAMILY     = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

elhub_colors = [
    "#212148", "#7fb48a", "#5369b2",
    "#05677d", "#886599", "#d58000",
]

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# ─── Modify here for MONTHLY ───────────────────────────────────────────────────

# 3) Extract year-month string
df['month'] = df['startTime'].dt.to_period('M').astype(str)

# 4) Sum hourly infeed into **monthly** totals per priceArea
monthly_totals = (
    df
    .groupby(['month', 'priceArea'])['netInfeedQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those monthly totals for each priceArea
avg_monthly_infeed = (
    monthly_totals
    .groupby('priceArea')['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgMonthlyInfeedKWh'})
)

# 6) Build dynamic title
title_text = (
    f"Average Monthly Net Infeed per Price Area\n"
    f"({span_start} to {span_end})"
)

# ─── Plot exactly as before, but using avg_monthly_infeed ───────────────────────

fig = px.bar(
    avg_monthly_infeed,
    x='priceArea',
    y='avgMonthlyInfeedKWh',
    color='priceArea',
    text='avgMonthlyInfeedKWh',
    title=title_text,
    labels={'avgMonthlyInfeedKWh':'Avg Monthly Net Infeed (kWh)'},
    color_discrete_sequence=elhub_colors
)

fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

fig.show()

# 7) Export to PDF
output_path = "average_monthly_net_infeed_per_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")

### Diurnal Profile of grid net infeed - sorted by price season

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
from pandas.api.types import CategoricalDtype

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS      = 24
AXIS_TITLE_FS = 20
TICK_FS       = 18
MARKER_SIZE   = 8
FONT_FAMILY   = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors (we’ll use the first four for the four seasons)
season_colors = [
    "#212148",  # winter (Mørk Lilla)
    "#7fb48a",  # spring (Grønn)
    "#5369b2",  # summer (Blå)
    "#05677d",  # autumn (Blå-grønn)
]

# 1) Copy & localize timestamps
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Build dynamic title span
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract month & hour
df['month'] = df['startTime'].dt.month
df['hour']  = df['startTime'].dt.hour

# 4) Map month → season
season_map = {
    **dict.fromkeys([12, 1, 2],   'winter'),
    **dict.fromkeys([3, 4, 5],    'spring'),
    **dict.fromkeys([6, 7, 8],    'summer'),
    **dict.fromkeys([9, 10, 11],  'autumn'),
}
df['season'] = df['month'].map(season_map)

# 5) Enforce season ordering
season_cat = CategoricalDtype(['winter','spring','summer','autumn'], ordered=True)
df['season'] = df['season'].astype(season_cat)

# 6) Compute avg infeed per hour & season
avg_season_hourly = (
    df
    .groupby(['season','hour'], observed=True)['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgHourlyInfeedKWh'})
)

# 7) Compute each season’s share of that hour’s total
hourly_totals = avg_season_hourly.groupby('hour')['avgHourlyInfeedKWh'].transform('sum')
avg_season_hourly['pctOfHour'] = avg_season_hourly['avgHourlyInfeedKWh'] / hourly_totals

# 8) Title text
title_text = (
    f"Average Hourly Infeed by Season\n"
    f"({span_start} to {span_end})"
)

# 9) Create the line chart, carrying season & pctOfHour in custom_data
fig = px.line(
    avg_season_hourly,
    x='hour',
    y='avgHourlyInfeedKWh',
    color='season',
    custom_data=['season','pctOfHour'],
    markers=True,
    title=title_text,
    labels={
        'hour': 'Hour of Day',
        'avgHourlyInfeedKWh': 'Avg Hourly Infeed (kWh)',
        'season': 'Season'
    },
    color_discrete_sequence=season_colors
)

# 10) Style traces & hovertemplate
fig.update_traces(
    marker=dict(size=MARKER_SIZE),
    line=dict(width=3),
    hovertemplate=(
        "%{y:,.0f} kWh<br>"
        "Hour: %{x}<br>"
        "Season: %{customdata[0]}<br>"
        "Share: %{customdata[1]:.1%}"
        "<extra></extra>"
    )
)

# 11) Layout styling
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        dtick=1
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_text='Season',
    legend=dict(
        title_font_size=AXIS_TITLE_FS,
        font_size=TICK_FS,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='center',
        x=0.5
    ),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 12) Show & export
fig.show()

output_path = "diurnal_profile_infeed_by_season.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Diurnal Profile of grid net infeed - sorted by price area

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── ELHUB DESIGN MANUAL ──────────────────────────────────────────────────────
TITLE_FS        = 24        # Title font size
AXIS_TITLE_FS   = 20        # Axis title font size
TICK_FS         = 18        # Tick label font size
MARKER_SIZE     = 8         # Marker diameter
LINE_WIDTH      = 3         # Line width
FONT_FAMILY     = "Roboto"  # Global font

# Elhub palette for 5 price areas (from design manual):
ELHUB_PALETTE = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]
# ──────────────────────────────────────────────────────────────────────────────

# 1) Copy & localize timestamps
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Build dynamic title span
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract hour of day
df['hour'] = df['startTime'].dt.hour

# 4) Compute avg infeed per hour & price area
avg_price_hourly = (
    df
    .groupby(['priceArea', 'hour'], observed=True)['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgHourlyInfeedKWh'})
)

# 5) Compute each area’s share of that hour’s total
hourly_totals = avg_price_hourly.groupby('hour')['avgHourlyInfeedKWh'].transform('sum')
avg_price_hourly['pctOfHour'] = avg_price_hourly['avgHourlyInfeedKWh'] / hourly_totals

# 6) Title text
title_text = (
    f"Average Hourly Infeed by Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the line chart carrying priceArea & pctOfHour in custom_data
fig = px.line(
    avg_price_hourly,
    x='hour',
    y='avgHourlyInfeedKWh',
    color='priceArea',
    custom_data=['priceArea', 'pctOfHour'],
    markers=True,
    title=title_text,
    labels={
        'hour': 'Hour of Day',
        'avgHourlyInfeedKWh': 'Avg Hourly Infeed (kWh)',
        'priceArea': 'Price Area'
    },
    color_discrete_sequence=ELHUB_PALETTE
)

# 8) Apply Elhub styling touches and enhanced hovertemplate
fig.update_traces(
    marker=dict(size=MARKER_SIZE),
    line=dict(width=LINE_WIDTH),
    hovertemplate=(
        "%{y:,.0f} kWh<br>"
        "Hour: %{x}<br>"
        "Area: %{customdata[0]}<br>"
        "Share: %{customdata[1]:.1%}"
        "<extra></extra>"
    )
)

# 9) Layout styling
fig.update_layout(
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        dtick=1
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_text='Price Area',
    legend=dict(
        title_font_size=AXIS_TITLE_FS,
        font_size=TICK_FS,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='center',
        x=0.5
    ),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 10) Show & export
fig.show()

output_path = "diurnal_profile_infeed_by_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Daily Grid Loss by Price Area - every day - perhaps most useful in an interactive dashboard - streamlit?

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# ─── ELHUB DESIGN MANUAL ──────────────────────────────────────────────────────
TITLE_FS      = 24        # Title font size
AXIS_TITLE_FS = 20        # Axis title font size
TICK_FS       = 18        # Tick label font size
FONT_FAMILY   = "Roboto"  # Global font

# ELhub palette for price areas
ELHUB_PALETTE = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]
# ──────────────────────────────────────────────────────────────────────────────

# 1) Prepare your DataFrame (assumes big_df is loaded)
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Extract date span for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract date (YYYY-MM-DD)
df['date'] = df['startTime'].dt.date.astype(str)

# 4) Aggregate total loss per day and priceArea
daily_loss = (
    df
    .groupby(['date', 'priceArea'], observed=True)['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute each area’s percentage of that day’s total
totals = daily_loss.groupby('date')['calculatedLossQuantityKwh'].transform('sum')
daily_loss['pctOfTotal'] = daily_loss['calculatedLossQuantityKwh'] / totals

# 6) Build title
title_text = (
    f"Daily Grid Loss by Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the stacked bar chart, carrying pctOfTotal in custom_data
fig = px.bar(
    daily_loss,
    x='date',
    y='calculatedLossQuantityKwh',
    color='priceArea',
    custom_data=['priceArea', 'pctOfTotal'],   # embed both fields
    title=title_text,
    labels={
        'date': 'Date',
        'calculatedLossQuantityKwh': 'Total Loss (kWh)',
        'priceArea': 'Price Area'
    },
    color_discrete_sequence=ELHUB_PALETTE,
    opacity=1.0
)

# 8) ELhub styling on black background and enhanced hovertemplate
fig.update_traces(
    marker_line_width=0.5,
    marker_line_color='white',
    hovertemplate=(
        "%{y:,.0f} kWh<br>"           # total loss
        "Date: %{x}<br>"             # date
        "Area: %{customdata[0]}<br>" # priceArea
        "Share: %{customdata[1]:.1%}"# percentage of daily total
        "<extra></extra>"
    )
)

fig.update_layout(
    font=dict(family=FONT_FAMILY, color='white'),
    title=dict(
        text=title_text,
        font=dict(size=TITLE_FS, family=FONT_FAMILY, color='white'),
        x=0.5
    ),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS, color='white'),
        tickfont=dict(size=TICK_FS, color='white'),
        type='category',
        categoryorder='category ascending',
        gridcolor='#222222'
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS, color='white'),
        tickfont=dict(size=TICK_FS, color='white'),
        gridcolor='#222222'
    ),
    legend_title_text='Price Area',
    legend=dict(
        title_font_size=AXIS_TITLE_FS,
        font_size=TICK_FS,
        font_color='white',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='center',
        x=0.5,
        bgcolor='rgba(0,0,0,0)'
    ),
    plot_bgcolor='black',
    paper_bgcolor='black',
    margin=dict(t=100, b=80),
    barmode='stack'
)

# 9) Show and export
fig.show()

output_path = "daily_all_grid_loss_by_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Monthly Grid Loss by Price Area every month

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS        = 24   # plot title
AXIS_TITLE_FS   = 20   # x/y axis titles
TICK_FS         = 18   # x/y tick labels
LEGEND_TITLE_FS = 18   # legend title
LEGEND_FS       = 16   # legend items
FONT_FAMILY     = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title (first to last month)
span_start = df['startTime'].dt.to_period('M').min().to_timestamp().strftime('%Y-%m')
span_end   = df['startTime'].dt.to_period('M').max().to_timestamp().strftime('%Y-%m')

# 3) Extract month period
df['month'] = df['startTime'].dt.to_period('M').astype(str)

# 4) Aggregate total loss per month and priceArea
monthly_loss = (
    df
    .groupby(['month', 'priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute each priceArea's percentage share of that month's total loss
monthly_loss['month_total'] = monthly_loss.groupby('month')['calculatedLossQuantityKwh'].transform('sum')
monthly_loss['pctShare']    = monthly_loss['calculatedLossQuantityKwh'] / monthly_loss['month_total'] * 100

# 6) Build dynamic title
title_text = (
    f"Monthly Grid Loss by Price Area (stacked)\n"
    f"({span_start} to {span_end})"
)

# 7) Create the stacked bar chart with Elhub colours,
#    and carry pctShare through custom_data for the hover only
fig = px.bar(
    monthly_loss,
    x='month',
    y='calculatedLossQuantityKwh',
    color='priceArea',
    custom_data=['pctShare'],
    title=title_text,
    labels={
        'month': 'Month',
        'calculatedLossQuantityKwh': 'Total Loss (kWh)',
        'priceArea': 'Price Area'
    },
    category_orders={'month': sorted(monthly_loss['month'].unique())},
    color_discrete_sequence=elhub_colors
)

# 8) Stack bars and style, but remove any inside‐bar text
fig.update_layout(
    barmode='stack',
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        categoryorder='category ascending'
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_font=dict(size=LEGEND_TITLE_FS),
    legend_font=dict(size=LEGEND_FS),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 9) Add hovertemplate so only the tooltip shows the kWh and % share
fig.update_traces(
    text=None,
    hovertemplate=(
        "<b>Price Area:</b> %{fullData.name}<br>"
        "<b>Month:</b> %{x}<br>"
        "<b>Total Loss:</b> %{y:,.0f} kWh<br>"
        "<b>Share:</b> %{customdata[0]:.1f}%<extra></extra>"
    )
)

# 10) Render
fig.show()

# 11) Export to PDF
output_path = "monthly_grid_loss_per_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Monthly Grid Loss by Price Area aggregated by month

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS        = 24   # plot title
AXIS_TITLE_FS   = 20   # x/y axis titles
TICK_FS         = 18   # x/y tick labels
LEGEND_TITLE_FS = 18   # legend title
LEGEND_FS       = 16   # legend items
FONT_FAMILY     = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]

# ─── PREPARE YOUR DATAFRAME ───────────────────────────────────────────────────
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['year']        = df['startTime'].dt.year
df['month_num']   = df['startTime'].dt.month
df['month_name']  = df['startTime'].dt.month_name().str.slice(stop=3)
# ──────────────────────────────────────────────────────────────────────────────

# 1) Compute monthly total loss for each year‐month & priceArea
monthly_totals = (
    df
    .groupby(['year','month_num','month_name','priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index(name='monthlyLossKWh')
)

# 2) Average across years to get one value per calendar month & priceArea
avg_calendar = (
    monthly_totals
    .groupby(['month_num','month_name','priceArea'])['monthlyLossKWh']
    .mean()
    .reset_index(name='avgMonthlyLossKWh')
)

# 3) Ensure calendar‐month ordering Jan → Dec
month_order = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
avg_calendar['month_name'] = pd.Categorical(
    avg_calendar['month_name'],
    categories=month_order,
    ordered=True
)
avg_calendar = avg_calendar.sort_values('month_name')

# 4) Compute each priceArea's percentage share of that calendar‐month avg loss
avg_calendar['month_total'] = avg_calendar.groupby('month_name')['avgMonthlyLossKWh'].transform('sum')
avg_calendar['pctShare']    = avg_calendar['avgMonthlyLossKWh'] / avg_calendar['month_total'] * 100

# 5) Build dynamic title
year_start = df['year'].min()
year_end   = df['year'].max()
title_text = (
    f"Average Monthly Grid Loss by Price Area\n"
    f"(Calendar Months {year_start}–{year_end})"
)

# 6) Create the stacked bar chart with Elhub colours,
#    carry pctShare into custom_data for the hover only
fig = px.bar(
    avg_calendar,
    x='month_name',
    y='avgMonthlyLossKWh',
    color='priceArea',
    custom_data=['pctShare'],
    title=title_text,
    labels={
        'month_name': 'Month',
        'avgMonthlyLossKWh': 'Avg Loss (kWh)',
        'priceArea': 'Price Area'
    },
    category_orders={'month_name': month_order},
    color_discrete_sequence=elhub_colors
)

# 7) Style the layout
fig.update_layout(
    barmode='stack',
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    title_x=0.5,
    font=dict(family=FONT_FAMILY),
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS),
        categoryorder='array',
        categoryarray=month_order
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    legend_title_font=dict(size=LEGEND_TITLE_FS),
    legend_font=dict(size=LEGEND_FS),
    margin=dict(t=100, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# 8) Add hovertemplate so only the tooltip shows kWh + % share
fig.update_traces(
    hovertemplate=(
        "<b>Price Area:</b> %{fullData.name}<br>"
        "<b>Month:</b> %{x}<br>"
        "<b>Avg Loss:</b> %{y:,.0f} kWh<br>"
        "<b>Share:</b> %{customdata[0]:.1f}%<extra></extra>"
    )
)

# 9) Render
fig.show()

# 10) Export to PDF
output_path = "average_monthly_grid_loss_by_price_area_calendar.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=1000,
    height=700,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Histogram of Hourly Losses by Price Area and season

In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS           = 24   # plot title
AXIS_TITLE_FS      = 22   # x/y axis titles
TICK_FS            = 20   # x/y tick labels
SUBPLOT_TITLE_FS   = 20   # subplot titles (season & price area)
FONT_FAMILY        = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå-grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]
season_colors = {
    'Summer': elhub_colors[0],
    'Winter': elhub_colors[1],
}

# ── Assumes big_df exists with columns: 'startTime','priceArea','calculatedLossQuantityKwh' ──

# 1) Localize timestamps & extract month/season
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['month'] = df['startTime'].dt.month
df['season'] = df['month'].map(
    lambda m: 'Summer' if m in (6,7,8)
              else 'Winter' if m in (12,1,2)
              else None
)
df = df[df['season'].notna()]

# 2) Only include strictly positive loss values
df = df[df['calculatedLossQuantityKwh'] > 0]

# 3) Compute overall span for title (month/year only)
span_start = df['startTime'].min().strftime('%b %Y')
span_end   = df['startTime'].max().strftime('%b %Y')

# 4) Determine 0th and 90th percentiles for trimming
lo, hi = df['calculatedLossQuantityKwh'].quantile([0.0, 0.9])

# 5) Prepare subplots with unique titles per cell
price_areas = sorted(df['priceArea'].unique())
n = len(price_areas)
subplot_titles = []
for pa in price_areas:
    subplot_titles += [
        f"{pa} — Summer",
        f"{pa} — Winter"
    ]

fig = make_subplots(
    rows=n, cols=2,
    shared_xaxes='rows',
    horizontal_spacing=0.1,   # increased gap between columns
    vertical_spacing=0.08,
    subplot_titles=subplot_titles
)

# 6) Bins
bins = np.linspace(lo, hi, 40)

# 7) Add histograms with Elhub colors
for i, pa in enumerate(price_areas, start=1):
    for j, season in enumerate(['Summer', 'Winter'], start=1):
        data = (
            df[(df['priceArea']==pa) & (df['season']==season)]
              ['calculatedLossQuantityKwh']
              .clip(lo, hi)
        )
        fig.add_trace(
            go.Histogram(
                x=data,
                xbins=dict(start=lo, end=hi, size=(hi-lo)/40),
                marker_color=season_colors[season],
                showlegend=False
            ),
            row=i, col=j
        )

# 8) Uniform x-axis range, tick labels, more ticks, angled labels, and automargin
for i in range(1, n+1):
    for j in (1, 2):
        fig.update_xaxes(
            range=[lo, hi],
            showticklabels=True,
            nticks=8,
            tickangle=0,
            automargin=True,
            row=i, col=j
        )

# 9) Label bottom row x-axes
x_title = "Loss (kWh) trimmed to 0–90 percentile"
fig.update_xaxes(title_text=x_title, row=n, col=1)
fig.update_xaxes(title_text=x_title, row=n, col=2)

# 10) Y-axis labels: only on Summer column, with automargin
for i in range(1, n+1):
    fig.update_yaxes(
        title_text="Frequency",
        automargin=True,
        row=i, col=1
    )
    fig.update_yaxes(
        title_text="",
        row=i, col=2
    )

# 11) Apply Elhub styling and dynamic sizing
fig.update_layout(
    title_text=(
        f"Seasonal Distributions of Hourly Loss by Price Area (0–90% trimmed)<br>"
        f"{span_start} – {span_end}"
    ),
    title_x=0.5,
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    font=dict(family=FONT_FAMILY),
    plot_bgcolor='white',
    paper_bgcolor='white',
    margin=dict(
        t=140,   # top for title
        b=120,   # bottom for x-ticks
        l=100    # left for y-ticks
    ),
    height=300 * n,
    width=900,   # slight width bump to accommodate wider gap
    autosize=True
)

# 12) Axis fonts
fig.update_xaxes(
    title_font=dict(size=AXIS_TITLE_FS, family=FONT_FAMILY),
    tickfont=dict(size=TICK_FS)
)
fig.update_yaxes(
    title_font=dict(size=AXIS_TITLE_FS, family=FONT_FAMILY),
    tickfont=dict(size=TICK_FS)
)

# 13) Subplot title fonts
for ann in fig.layout.annotations:
    ann.font = dict(size=SUBPLOT_TITLE_FS, family=FONT_FAMILY)

# 14) Show and save
fig.show()

output_path = "histogram_seasonal_hourly_loss_by_price_area.pdf"
fig.write_image(
    output_path,
    format="pdf",
    width=900,
    height=300 * n,
    scale=1
)
print(f"✅ Saved PDF to: {output_path}")


### Histogram of Summer/Winter hourly loss distributions for the lowest, median and highest‐infeed grid‐areas

In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ─── FONT SIZE & FAMILY SETTINGS ──────────────────────────────────────────────
TITLE_FS           = 24   # plot title
AXIS_TITLE_FS      = 22   # x/y axis titles
TICK_FS            = 20   # x/y tick labels
SUBPLOT_TITLE_FS   = 20   # subplot titles (grid‐area & price area)
FONT_FAMILY        = "Roboto"
# ──────────────────────────────────────────────────────────────────────────────

# Elhub brand colors
elhub_colors = [
    "#212148",  # Mørk Lilla
    "#7fb48a",  # Grønn
    "#5369b2",  # Blå
    "#05677d",  # Blå‐grønn
    "#886599",  # Lilla
    "#d58000",  # Oker
]
season_colors = {
    'Summer': elhub_colors[0],
    'Winter': elhub_colors[1],
}

# ── Assumes big_df exists with columns:
#    'startTime','gridArea','priceArea','calculatedLossQuantityKwh','netInfeedQuantityKwh'
# ──

# 1) Localize timestamps & extract month/season
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['month'] = df['startTime'].dt.month
df['season'] = df['month'].map(
    lambda m: 'Summer' if m in (6,7,8)
              else 'Winter' if m in (12,1,2)
              else None
)
# keep a full copy for net‐infeed sums (all months)
df_full = df.copy()
# filter to just Summer/Winter for histograms
df = df[df['season'].notna()]

# 2) Optionally drop non‐positive losses (commented out so you see all values)
# df = df[df['calculatedLossQuantityKwh'] > 0]

# 3) Compute overall span for title (month/year only)
span_start = df['startTime'].min().strftime('%b %Y')
span_end   = df['startTime'].max().strftime('%b %Y')

# ── Determine grid‐areas by netInfeedQuantityKwh total ────────────────────────
totals = (
    df_full
      .groupby('gridArea', as_index=False)['netInfeedQuantityKwh']
      .sum()
      .sort_values('netInfeedQuantityKwh')
)
lowest  = totals.iloc[0,   0]
highest = totals.iloc[-1,  0]
median  = totals.iloc[len(totals)//2, 0]
selected_grid_areas = [lowest, median, highest]

# Map each to its priceArea (first occurrence)
price_map = (
    df_full
      .drop_duplicates(['gridArea'])
      .set_index('gridArea')['priceArea']
      .to_dict()
)

# 4) Compute 0–90th percentiles for trimming
lo, hi = df['calculatedLossQuantityKwh'].quantile([0.0, 0.9])

# 5) Build subplot titles with row labels
row_labels = ['Lowest', 'Median', 'Highest']
subplot_titles = []
for idx, ga in enumerate(selected_grid_areas):
    pa    = price_map.get(ga, "Unknown")
    label = row_labels[idx]
    subplot_titles += [
        f"[{label}] {ga} ({pa}) — Summer",
        f"[{label}] {ga} ({pa}) — Winter"
    ]

# 6) Create 3×2 subplot grid
fig = make_subplots(
    rows=3, cols=2,
    shared_xaxes='rows',
    horizontal_spacing=0.1,
    vertical_spacing=0.08,
    subplot_titles=subplot_titles
)

# 7) Bins
bins = np.linspace(lo, hi, 40)

# 8) Add histograms
for i, ga in enumerate(selected_grid_areas, start=1):
    for j, season in enumerate(['Summer', 'Winter'], start=1):
        vals = (
            df[(df['gridArea']==ga) & (df['season']==season)]
              ['calculatedLossQuantityKwh']
              .clip(lo, hi)
        )
        fig.add_trace(
            go.Histogram(
                x=vals,
                xbins=dict(start=lo, end=hi, size=(hi - lo) / 40),
                marker_color=season_colors[season],
                showlegend=False
            ),
            row=i, col=j
        )

# 9) Uniform axes, labels
for i in range(1, 4):
    for j in (1,2):
        fig.update_xaxes(
            range=[lo, hi],
            nticks=8,
            tickangle=0,
            automargin=True,
            row=i, col=j
        )
# bottom‐row x‐axis titles
x_title = "Loss (kWh) trimmed to 0–90 percentile"
fig.update_xaxes(title_text=x_title, row=3, col=1)
fig.update_xaxes(title_text=x_title, row=3, col=2)

# y‐axes
for i in range(1, 4):
    fig.update_yaxes(title_text="Frequency", automargin=True, row=i, col=1)
    fig.update_yaxes(title_text="", row=i, col=2)

# 10) Layout & styling
fig.update_layout(
    title_text=(
        f"Seasonal Distributions of Hourly Loss by Selected Grid‐Areas (0–90% trimmed)<br>"
        f"{span_start} – {span_end}"
    ),
    title_x=0.5,
    title_font=dict(size=TITLE_FS, family=FONT_FAMILY),
    font=dict(family=FONT_FAMILY),
    plot_bgcolor='white',
    paper_bgcolor='white',
    margin=dict(t=140, b=120, l=100),
    height=300 * 3,
    width=900,
    autosize=True
)
fig.update_xaxes(title_font=dict(size=AXIS_TITLE_FS, family=FONT_FAMILY),
                 tickfont=dict(size=TICK_FS))
fig.update_yaxes(title_font=dict(size=AXIS_TITLE_FS, family=FONT_FAMILY),
                 tickfont=dict(size=TICK_FS))
for ann in fig.layout.annotations:
    ann.font = dict(size=SUBPLOT_TITLE_FS, family=FONT_FAMILY)

# 11) Show & save
fig.show()
output_path = "histogram_seasonal_hourly_loss_by_gridArea.pdf"
fig.write_image(output_path, format="pdf", width=900, height=300 * 3, scale=1)
print(f"✅ Saved PDF to: {output_path}")


In [None]:
# ── RANGE SUMMARY FOR SELECTED GRID-AREAS ───────────────────────────────────────
# Assumes you’ve already defined `df_full` and `selected_grid_areas`

summary = (
    df_full[df_full['gridArea'].isin(selected_grid_areas)]
      .groupby('gridArea')
      .agg(
          netinfeed_min = ('netInfeedQuantityKwh', 'min'),
          netinfeed_max = ('netInfeedQuantityKwh', 'max'),
          loss_min      = ('calculatedLossQuantityKwh', 'min'),
          loss_max      = ('calculatedLossQuantityKwh', 'max'),
      )
      .reset_index()
)

print("\nRange of netInfeedQuantityKwh & calculatedLossQuantityKwh for selected grid-areas:\n")
print(summary.to_string(index=False))


## Appendix 

### Elhub color palette

In [None]:
elhubs_palette = {
    "grey": [
        "#2A2B2D",  # Grey-900
        "#4D4F54",  # Grey-800
        "#76777E",  # Grey-700
        "#B4B4B9",  # Grey-600
        "#CFD0D3",  # Grey-500
        "#D8D8DC",  # Grey-400
        "#F4F4F4",  # Grey-300
        "#F6F6F6",  # Grey-200
        "#FCFCFD",  # Grey-100
        "#FFFFFF",  # Grey-0
    ],
    "green": [
        "#0B3C28",  # Green-700
        "#0A4420",  # Green-600
        "#274E3B",  # Green-550
        "#0F5537",  # Green-500
        "#1F8F5D",  # Green-450
        "#93E08A",  # Green-400
        "#5FEF55",  # Green-300
        "#F5FFF5",  # Green-200
        "#E6F3EF",  # Green-150
    ],
    "blue": [
        "#104AA2",  # Blue-600
        "#4A60F2",  # Blue-500
        "#E6F5FF",  # Blue-400
    ],
    "purple": [
        "#27148B",  # Purple-700
        "#3A2A7D",  # Purple-600
        "#5F3DFF",  # Purple-500
        "#ACB2FF",  # Purple-400
        "#E3D7FF",  # Purple-300
        "#F8F5FF",  # Purple-200
    ],
    "red": [
        "#7F1D35",  # Red-600
        "#D34A3D",  # Red-500
        "#FCD2D2",  # Red-400
    ],
    "orange": [
        "#F7430C",  # Orange-700
        "#FFD070",  # Orange-500
        "#FFE3C0",  # Orange-300
    ],
    "yellow": [
        "#EEC116",  # Yellow-500
        "#FFF8DF",  # Yellow-400
    ],
    "brown": [
        "#77470D",  # Brown-600
    ],
    "beige": [
        "#F0E6E5",  # Beige-600
        "#FCFAF6",  # Beige-400
    ],
}
