# Elhub API data - Gridloss - Summerproject 2025

## Visualization

Bjørn Eirik Rognskog Nordbak

### Importing data from Elhub API
https://api.elhub.no/energy-data-api#/grid-areas

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

oslo = ZoneInfo("Europe/Oslo")

def fetch_window(start_dt, end_dt):
    params = {
        "dataset":   "LOSS_PER_MGA_HOUR",
        "startDate": start_dt.isoformat(),
        "endDate":   end_dt.isoformat(),
    }
    url = "https://api.elhub.no/energy-data/v0/grid-areas"
    resp = requests.get(url, params=params)
    obj = resp.json()
    
    # --- safeguard: if there's no "data", bail with empty DF ----
    raw = obj.get("data")
    if raw is None:
        print(f"  → no 'data' for {start_dt.date()} → {end_dt.date()}, skipping")
        return pd.DataFrame()
    
    # otherwise flatten
    df = pd.json_normalize(
        raw,
        record_path=["attributes", "lossPerMgaHour"],
        meta=[
            ["attributes", "eic"],
            ["attributes", "name"],
            ["attributes", "status"],
        ],
        errors="ignore"
    ).rename(columns={
        "attributes.eic":    "eic",
        "attributes.name":   "name",
        "attributes.status": "status",
    })
    return df

# loop as before
span_start = datetime(2023,1,1,0,0, tzinfo=oslo)
span_end   = datetime(2025,6,1,0,0, tzinfo=oslo)
window = timedelta(days=7)

all_chunks = []
cur = span_start
while cur < span_end:
    nxt = min(cur + window, span_end)
    print(f"Fetching {cur.date()} → {nxt.date()}")
    dfc = fetch_window(cur, nxt)
    all_chunks.append(dfc)
    cur = nxt

big_df = pd.concat(all_chunks, ignore_index=True)


### Save the data to a CSV file (optional)

In [None]:
import pandas as pd

# 1. Save your DataFrame to CSV
# Replace big_df with your DataFrame variable
big_df.to_csv('big_df.csv', index=False)

### Read the CSV file (if needed)

In [None]:
import pandas as pd

# 2. Load the DataFrame back from CSV
# This will recreate the DataFrame exactly as it was (aside from types inference)
big_df = pd.read_csv('big_df.csv')

big_df

## Exploring the Elhub API gridloss data

### Unique eic codes

In [None]:
# Count unique EIC codes
unique_eic_count = big_df['eic'].nunique()
print(f"Number of unique EIC codes: {unique_eic_count}")

# List them out
unique_eics = big_df['eic'].unique()
print(unique_eics)


### Unique names entries

In [None]:
# Count unique name entries
unique_name_count = big_df['name'].nunique()
print(f"Number of unique names: {unique_name_count}")

# List all unique names
unique_names = big_df['name'].unique()
print(unique_names)


### Unique gridArea entries

In [None]:
# Count unique gridArea entries
unique_gridarea_count = big_df['gridArea'].nunique()
print(f"Number of unique grid areas: {unique_gridarea_count}")

# List all unique gridArea codes
unique_gridareas = big_df['gridArea'].unique()
print(unique_gridareas)


### Unique priceArea entries

In [None]:
# Count unique priceArea entries
unique_pricearea_count = big_df['priceArea'].nunique()
print(f"Number of unique price areas: {unique_pricearea_count}")

# List all unique priceArea codes
unique_priceareas = big_df['priceArea'].unique()
print(unique_priceareas)


### Average hourly grid loss per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 20   # plot title
AXIS_TITLE_FS   = 16   # x/y axis titles
TICK_FS         = 14   # x/y tick labels
BAR_TEXT_FS     = 12   # number labels inside bars
# ──────────────────────────────────────────────────────────────────────────────

# Assumes big_df is already in your notebook, with 'startTime' & 'calculatedLossQuantityKwh' etc.

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract date (day)
df['date'] = df['startTime'].dt.date.astype(str)

# 4) Sum hourly losses into daily totals per priceArea
daily_totals = (
    df
    .groupby(['date', 'priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those daily totals for each priceArea
avg_daily = (
    daily_totals
    .groupby('priceArea')['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgDailyLossKWh'})
)

# 6) Build dynamic title
title_text = (
    f"Average Daily Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the bar chart
fig = px.bar(
    avg_daily,
    x='priceArea',
    y='avgDailyLossKWh',
    color='priceArea',
    text='avgDailyLossKWh',
    title=title_text,
    labels={'avgDailyLossKWh':'Avg Daily Loss (kWh)'},
    color_discrete_sequence=px.colors.qualitative.Plotly
)

# 8) Place values inside bars in white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 9) Style the layout, center title
fig.update_layout(
    title_font=dict(size=TITLE_FS),
    title_x=0.5,
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80)
)

# 10) Render
fig.show()


### Average monthly grid loss per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 20   # plot title
AXIS_TITLE_FS   = 16   # x/y axis titles
TICK_FS         = 14   # x/y tick labels
BAR_TEXT_FS     = 14   # number labels inside bars
# ──────────────────────────────────────────────────────────────────────────────

# assume big_df is already in your notebook, with 'startTime' & 'endTime' columns

# 1) Parse start/end timestamps as UTC, convert to Oslo time
df = big_df.copy()
df['startTime'] = pd.to_datetime(df['startTime'], utc=True).dt.tz_convert('Europe/Oslo')
df['endTime']   = pd.to_datetime(df['endTime'],   utc=True).dt.tz_convert('Europe/Oslo')

# 2) Compute overall span for title
span_start = df['startTime'].min().strftime('%Y-%m-%d')
span_end   = df['endTime'].max().strftime('%Y-%m-%d')

# 3) Extract month for aggregation
df['month'] = df['startTime'].dt.to_period('M').astype(str)

# 4) Sum hourly losses → monthly totals, then average per priceArea
monthly_totals = (
    df
    .groupby(['month', 'priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)
avg_monthly = (
    monthly_totals
    .groupby('priceArea')['calculatedLossQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'calculatedLossQuantityKwh':'avgMonthlyLossKWh'})
)

# 5) Build title string including dynamic dates
title_text = (
    f"Average Monthly Loss per Price Area\n"
    f"({span_start} to {span_end})"
)

# 6) Create the bar chart
fig = px.bar(
    avg_monthly,
    x='priceArea',
    y='avgMonthlyLossKWh',
    color='priceArea',
    text='avgMonthlyLossKWh',
    title=title_text,
    labels={'avgMonthlyLossKWh':'Avg Monthly Loss (kWh)'},
    color_discrete_sequence=px.colors.qualitative.Plotly
)

# 7) Move labels inside bars, white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 8) Add padding & center title
fig.update_layout(
    title_font=dict(size=TITLE_FS),
    title_x=0.5,  # center the main title
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80)
)

# 9) Show
fig.show()


### Diurnal Profile of Grid Loss: Summer vs Winter

In [None]:
import pandas as pd
import plotly.express as px

# ───── ASSUMES big_df EXISTS WITH 'startTime', 'endTime' & 'calculatedLossQuantityKwh' ─────

# 1) Parse full start/end timestamps as UTC → Oslo
df = big_df.copy()
df['startTime'] = pd.to_datetime(df['startTime'], utc=True).dt.tz_convert('Europe/Oslo')
df['endTime']   = pd.to_datetime(df['endTime'],   utc=True).dt.tz_convert('Europe/Oslo')

# 2) Compute the overall span for the title
span_start = df['startTime'].min().strftime('%Y-%m-%d %H:%M')
span_end   = df['endTime'].max().strftime('%Y-%m-%d %H:%M')

# 3) Extract hour/month and label seasons
df['hour']   = df['startTime'].dt.hour
df['month']  = df['startTime'].dt.month
df['season'] = df['month'].apply(lambda m: 'Summer' if m in (6,7,8)
                                          else 'Winter' if m in (12,1,2)
                                          else None)
df = df[df['season'].notna()]

# 4) Compute diurnal averages
diurnal = (
    df.groupby(['season','hour'])['calculatedLossQuantityKwh']
      .mean()
      .reset_index(name='avgLossKWh')
)

# 5) Build dynamic title
title_text = (
    f"Diurnal Profile of Grid Loss: Summer vs Winter\n"
    f"({span_start} to {span_end} Local Time)"
)

# 6) Plot with Plotly
fig = px.line(
    diurnal,
    x='hour',
    y='avgLossKWh',
    color='season',
    markers=True,
    title=title_text,
    labels={'hour':'Hour of Day','avgLossKWh':'Avg Loss (kWh)','season':'Season'}
)

# 7) Style
fig.update_traces(line=dict(width=3))
fig.update_layout(
    title_x=0.5,
    title_font_size=20,
    legend_title_font_size=14,
    legend_font_size=12,
    xaxis=dict(tickmode='array', tickvals=list(range(24)),
               ticktext=[str(h) for h in range(24)],
               title_font_size=16, tickfont_size=12),
    yaxis=dict(title_font_size=16, tickfont_size=12),
    margin=dict(t=100, b=50, l=50, r=50)
)

fig.show()


### Average daily net infeed in kwh per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 20   # plot title
AXIS_TITLE_FS   = 16   # x/y axis titles
TICK_FS         = 14   # x/y tick labels
BAR_TEXT_FS     = 12   # number labels inside bars
# ──────────────────────────────────────────────────────────────────────────────

# Assumes big_df is already in your notebook, with 'startTime' & 'netInfeedQuantityKwh' columns

# 1) Parse startTime as UTC then convert to Oslo
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Determine span for title
span_start = df['startTime'].min().date().isoformat()
span_end   = df['startTime'].max().date().isoformat()

# 3) Extract date (day)
df['date'] = df['startTime'].dt.date.astype(str)

# 4) Sum hourly infeed into daily totals per priceArea
daily_totals = (
    df
    .groupby(['date', 'priceArea'])['netInfeedQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those daily totals for each priceArea
avg_daily_infeed = (
    daily_totals
    .groupby('priceArea')['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgDailyInfeedKWh'})
)

# 6) Build dynamic title
title_text = (
    f"Average Daily Net Infeed per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the bar chart
fig = px.bar(
    avg_daily_infeed,
    x='priceArea',
    y='avgDailyInfeedKWh',
    color='priceArea',
    text='avgDailyInfeedKWh',
    title=title_text,
    labels={'avgDailyInfeedKWh':'Avg Daily Net Infeed (kWh)'},
    color_discrete_sequence=px.colors.qualitative.Plotly
)

# 8) Place values inside bars in white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 9) Style the layout, center title
fig.update_layout(
    title_font=dict(size=TITLE_FS),
    title_x=0.5,
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80)
)

# 10) Render
fig.show()


### Average monthly net infeed in kwh per price area

In [None]:
import pandas as pd
import plotly.express as px

# ─── FONT SIZE SETTINGS ───────────────────────────────────────────────────────
TITLE_FS        = 20   # plot title
AXIS_TITLE_FS   = 16   # x/y axis titles
TICK_FS         = 14   # x/y tick labels
BAR_TEXT_FS     = 12   # number labels inside bars
# ──────────────────────────────────────────────────────────────────────────────

# Assumes big_df is already in your notebook, with at least:
#   'startTime', 'endTime', 'priceArea', and 'netInfeedQuantityKwh'

# 1) Normalize your timestamps to Oslo local time
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['endTime'] = (
    pd.to_datetime(df['endTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)

# 2) Compute span for the title
span_start = df['startTime'].min().strftime('%Y-%m')
span_end   = df['endTime'].max().strftime('%Y-%m')

# 3) Extract a YYYY-MM period column for grouping
df['month'] = df['startTime'].dt.to_period('M').astype(str)

# 4) Sum hourly infeed into monthly totals per priceArea
monthly_totals = (
    df
    .groupby(['month','priceArea'])['netInfeedQuantityKwh']
    .sum()
    .reset_index()
)

# 5) Compute the average of those monthly totals for each priceArea
avg_monthly = (
    monthly_totals
    .groupby('priceArea')['netInfeedQuantityKwh']
    .mean()
    .reset_index()
    .rename(columns={'netInfeedQuantityKwh':'avgMonthlyInfeedKWh'})
)

# 6) Build a dynamic title
title_text = (
    f"Average Monthly Net Infeed per Price Area\n"
    f"({span_start} to {span_end})"
)

# 7) Create the bar chart
fig = px.bar(
    avg_monthly,
    x='priceArea',
    y='avgMonthlyInfeedKWh',
    color='priceArea',
    text='avgMonthlyInfeedKWh',
    title=title_text,
    labels={'avgMonthlyInfeedKWh':'Avg Monthly Net Infeed (kWh)'},
    color_discrete_sequence=px.colors.qualitative.Plotly
)

# 8) Place values inside bars, white for contrast
fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='inside',
    textfont=dict(color='white', size=BAR_TEXT_FS),
    cliponaxis=False
)

# 9) Style & center title
fig.update_layout(
    title_font=dict(size=TITLE_FS),
    title_x=0.5,
    xaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    yaxis=dict(
        title_font=dict(size=AXIS_TITLE_FS),
        tickfont=dict(size=TICK_FS)
    ),
    showlegend=False,
    margin=dict(t=100, b=80)
)

# 10) Render
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

# ───── ASSUMES big_df EXISTS WITH 'startTime', 'endTime' & 'netInfeedQuantityKwh' ─────

# 1) Parse full start/end timestamps as UTC → Oslo
df = big_df.copy()
df['startTime'] = pd.to_datetime(df['startTime'], utc=True).dt.tz_convert('Europe/Oslo')
df['endTime']   = pd.to_datetime(df['endTime'],   utc=True).dt.tz_convert('Europe/Oslo')

# 2) Compute the overall span for the title
span_start = df['startTime'].min().strftime('%Y-%m-%d %H:%M')
span_end   = df['endTime'].max().strftime('%Y-%m-%d %H:%M')

# 3) Extract hour/month and label seasons
df['hour']   = df['startTime'].dt.hour
df['month']  = df['startTime'].dt.month
df['season'] = df['month'].apply(
    lambda m: 'Summer' if m in (6,7,8)
              else 'Winter' if m in (12,1,2)
              else None
)
df = df[df['season'].notna()]

# 4) Compute diurnal averages of net infeed
diurnal_infeed = (
    df
    .groupby(['season','hour'])['netInfeedQuantityKwh']
    .mean()
    .reset_index(name='avgInfeedKWh')
)

# 5) Build dynamic title
title_text = (
    f"Diurnal Profile of Net Infeed: Summer vs Winter\n"
    f"({span_start} to {span_end} Local Time)"
)

# 6) Plot with Plotly
fig = px.line(
    diurnal_infeed,
    x='hour',
    y='avgInfeedKWh',
    color='season',
    markers=True,
    title=title_text,
    labels={
        'hour': 'Hour of Day',
        'avgInfeedKWh': 'Avg Net Infeed (kWh)',
        'season': 'Season'
    }
)

# 7) Style
fig.update_traces(line=dict(width=3))
fig.update_layout(
    title_x=0.5,
    title_font_size=20,
    legend_title_font_size=14,
    legend_font_size=12,
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(24)),
        ticktext=[str(h) for h in range(24)],
        title_font_size=16,
        tickfont_size=12
    ),
    yaxis=dict(
        title_font_size=16,
        tickfont_size=12
    ),
    margin=dict(t=100, b=50, l=50, r=50)
)

fig.show()

### Daily Grid Loss by Price Area (displaying every day, not aggregated)

In [None]:
import pandas as pd
import plotly.express as px

# 1) Prepare your DataFrame (assumes big_df is loaded)
df = big_df.copy()
df['startTime'] = pd.to_datetime(df['startTime'], utc=True).dt.tz_convert('Europe/Oslo')

# 2) Extract date (YYYY-MM-DD)
df['date'] = df['startTime'].dt.date.astype(str)

# 3) Aggregate total loss per day and priceArea
daily_loss = (
    df
    .groupby(['date', 'priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)

# 4) Create a stacked bar chart for daily loss
fig = px.bar(
    daily_loss,
    x='date',
    y='calculatedLossQuantityKwh',
    color='priceArea',
    title='Daily Grid Loss by Price Area (stacked)',
    labels={
        'date': 'Date',
        'calculatedLossQuantityKwh': 'Total Loss (kWh)',
        'priceArea': 'Price Area'
    }
)

# 5) Style and stack
fig.update_layout(
    barmode='stack',
    xaxis=dict(type='category', categoryorder='category ascending'),
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    legend_title_font_size=14,
    legend_font_size=12,
    title_x=0.5,
    title_font_size=20,
    margin=dict(t=80, b=120)  # extra bottom margin for long date labels
)

fig.show()

### Monthly Grid Loss by Price Area every month

In [None]:
import pandas as pd
import plotly.express as px

# 1) Prepare your DataFrame
df = big_df.copy()
df['startTime'] = pd.to_datetime(df['startTime'], utc=True).dt.tz_convert('Europe/Oslo')
df['month'] = df['startTime'].dt.to_period('M').astype(str)

# 2) Aggregate total loss per month and priceArea
monthly_loss = (
    df
    .groupby(['month', 'priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index()
)

# 3) Create a stacked bar chart
fig = px.bar(
    monthly_loss,
    x='month',
    y='calculatedLossQuantityKwh',
    color='priceArea',
    title='Monthly Grid Loss by Price Area (stacked)',
    labels={
        'month': 'Month',
        'calculatedLossQuantityKwh': 'Total Loss (kWh)',
        'priceArea': 'Price Area'
    }
)

# 4) Stack bars and style
fig.update_layout(
    barmode='stack',
    xaxis=dict(categoryorder='category ascending'),
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    legend_title_font_size=14,
    legend_font_size=12,
    title_x=0.5,
    title_font_size=20,
    margin=dict(t=80, b=80)
)

fig.show()


### Monthly Grid Loss by Price Area aggregated by month

In [None]:
import pandas as pd
import plotly.express as px

# ─── PREPARE YOUR DATAFRAME ───────────────────────────────────────────────────
df = big_df.copy()
# parse and localize timestamps
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
# extract year and calendar month
df['year']           = df['startTime'].dt.year
df['month_num']      = df['startTime'].dt.month
df['month_name']     = df['startTime'].dt.month_name().str.slice(stop=3)  # "Jan", "Feb", …
# ─────────────────────────────────────────────────────────────────────────────

# 1) Compute monthly total loss for each year‐month & priceArea
monthly_totals = (
    df
    .groupby(['year','month_num','month_name','priceArea'])['calculatedLossQuantityKwh']
    .sum()
    .reset_index(name='monthlyLossKWh')
)

# 2) Average across years to get one value per calendar month & priceArea
avg_calendar = (
    monthly_totals
    .groupby(['month_num','month_name','priceArea'])['monthlyLossKWh']
    .mean()
    .reset_index()
    .rename(columns={'monthlyLossKWh':'avgMonthlyLossKWh'})
)

# 3) Ensure calendar‐month ordering Jan → Dec
month_order = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
avg_calendar['month_name'] = pd.Categorical(avg_calendar['month_name'], categories=month_order, ordered=True)
avg_calendar = avg_calendar.sort_values('month_name')

# 4) Plot a stacked bar chart
fig = px.bar(
    avg_calendar,
    x='month_name',
    y='avgMonthlyLossKWh',
    color='priceArea',
    title='Average Monthly Grid Loss by Price Area (Calendar Months)',
    labels={
        'month_name': 'Month',
        'avgMonthlyLossKWh': 'Avg Loss (kWh)',
        'priceArea': 'Price Area'
    }
)

# 5) Style the chart
fig.update_layout(
    barmode='stack',
    xaxis=dict(categoryorder='array', categoryarray=month_order),
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    title_font_size=20,
    legend_title_font_size=14,
    legend_font_size=12,
    title_x=0.5,
    margin=dict(t=80, b=80)
)

fig.show()


In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ASSUMES big_df exists with columns:
#   'startTime', 'priceArea', 'calculatedLossQuantityKwh'

# 1) Localize timestamps & extract month/season
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['month'] = df['startTime'].dt.month
df['season'] = df['month'].map(
    lambda m: 'Summer' if m in (6,7,8)
              else 'Winter' if m in (12,1,2)
              else None
)
df = df[df['season'].notna()]

# 2) Determine 5th and 95th percentiles of loss for trimming (across all PAs/seasons)
lo, hi = df['calculatedLossQuantityKwh'].quantile([0.05, 0.95])

# 3) List price areas and prepare subplot grid
price_areas = sorted(df['priceArea'].unique())
n = len(price_areas)

# Build titles row-by-row: [PA1—Summer, PA1—Winter, PA2—Summer, PA2—Winter, …]
subplot_titles = []
for pa in price_areas:
    subplot_titles += [f"{pa} — Summer", f"{pa} — Winter"]

fig = make_subplots(
    rows=n, cols=2,
    shared_xaxes='rows',
    horizontal_spacing=0.04,
    vertical_spacing=0.04,
    subplot_titles=subplot_titles
)

# 4) Colors for seasons
colors = {'Summer': '#E24A33', 'Winter': '#348ABD'}

# 5) Add one histogram per (priceArea, season)
bins = np.linspace(lo, hi, 40)
for i, pa in enumerate(price_areas, start=1):
    for j, season in enumerate(['Summer', 'Winter'], start=1):
        data = (
            df[(df['priceArea']==pa) & (df['season']==season)]
            ['calculatedLossQuantityKwh']
            .clip(lo, hi)
        )
        fig.add_trace(
            go.Histogram(
                x=data,
                xbins=dict(start=lo, end=hi, size=(hi-lo)/40),
                marker_color=colors[season],
                showlegend=False
            ),
            row=i, col=j
        )

# 6) Update axes: show tick labels on every subplot, and set the same range per row
for i in range(1, n+1):
    # Shared-xaxes='rows' links ranges, but we explicitly set range on first column:
    fig.update_xaxes(range=[lo, hi], row=i, col=1)
    # Ensure both subplots in the row show tick labels
    fig.update_xaxes(showticklabels=True, row=i, col=1)
    fig.update_xaxes(showticklabels=True, row=i, col=2)

# 7) Label only the bottom row with an x-axis title
fig.update_xaxes(
    title_text="Loss (kWh) trimmed to 5–95 percentile",
    row=n, col=1
)
fig.update_xaxes(
    title_text="Loss (kWh) trimmed to 5–95 percentile",
    row=n, col=2
)

# 8) Global layout tweaks
fig.update_layout(
    title_text="Seasonal Distributions of Loss by Price Area (5–95% trimmed)",
    title_x=0.5,
    height=250 * n,
    width=800,
    margin=dict(t=100, b=80)
)

fig.show()


In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ─ Assumes big_df exists with columns:
#   'startTime', 'priceArea', 'calculatedLossQuantityKwh'

# 1) Localize timestamps & extract month/season
df = big_df.copy()
df['startTime'] = (
    pd.to_datetime(df['startTime'], utc=True)
      .dt.tz_convert('Europe/Oslo')
)
df['month'] = df['startTime'].dt.month
df['season'] = df['month'].map(
    lambda m: 'Summer' if m in (6,7,8)
              else 'Winter' if m in (12,1,2)
              else None
)
df = df[df['season'].notna()]

# 2) Compute overall span for title
span_start = df['startTime'].min().strftime('%Y-%m-%d %H:%M')
span_end   = df['startTime'].max().strftime('%Y-%m-%d %H:%M')

# 3) Determine 5th and 95th percentiles of loss for trimming
lo, hi = df['calculatedLossQuantityKwh'].quantile([0.05, 0.95])

# 4) List price areas and prepare subplot grid
price_areas = sorted(df['priceArea'].unique())
n = len(price_areas)
subplot_titles = []
for pa in price_areas:
    subplot_titles += [f"{pa} — Summer", f"{pa} — Winter"]

fig = make_subplots(
    rows=n, cols=2,
    shared_xaxes='rows',
    horizontal_spacing=0.04,
    vertical_spacing=0.04,
    subplot_titles=subplot_titles
)

# 5) Colors
colors = {'Summer': '#E24A33', 'Winter': '#348ABD'}
bins = np.linspace(lo, hi, 40)

# 6) Add histograms
for i, pa in enumerate(price_areas, start=1):
    for j, season in enumerate(['Summer', 'Winter'], start=1):
        data = (
            df[(df['priceArea']==pa) & (df['season']==season)]
              ['calculatedLossQuantityKwh']
              .clip(lo, hi)
        )
        fig.add_trace(
            go.Histogram(
                x=data,
                xbins=dict(start=lo, end=hi, size=(hi-lo)/40),
                marker_color=colors[season],
                showlegend=False
            ),
            row=i, col=j
        )

# 7) Uniform x-axis range & tick labels per row
for i in range(1, n+1):
    fig.update_xaxes(range=[lo, hi], row=i, col=1)
    fig.update_xaxes(showticklabels=True, row=i, col=1)
    fig.update_xaxes(showticklabels=True, row=i, col=2)

# 8) Label bottom row
fig.update_xaxes(
    title_text="Loss (kWh) trimmed to 5–95 percentile",
    row=n, col=1
)
fig.update_xaxes(
    title_text="Loss (kWh) trimmed to 5–95 percentile",
    row=n, col=2
)

# 9) Main title with dynamic span
fig.update_layout(
    title_text=(
        f"Seasonal Distributions of Loss by Price Area (5–95% trimmed)<br>"
        f"{span_start} to {span_end} Oslo Time"
    ),
    title_x=0.5,
    height=250 * n,
    width=800,
    margin=dict(t=120, b=80)
)

fig.show()
