# Load Profile Treatment

**Objective**  
Turn the measured hourly load profile into a smoothed synthetic curve that respects the main seasonal, daily, and hourly patterns.

**Data & method**  
- Input: a single CSV stored in `input/` with a MultiIndex (season, day, hour) and yearly columns.  
- Method: load the selected reference year, compute simple coefficients, build a synthetic profile, and compare it to the observed data.


## 1. User inputs
Provide the file name (relative to `input/`) and choose the reference year column that will drive the diagnostics and coefficients.

In [None]:
load_profile_filename = 'load_profile_test.csv'
reference_year = 2022

## 2. Setup: libraries and helper utilities
Import the core scientific libraries and define small helper functions used throughout the workflow.

In [None]:

from pathlib import Path
from typing import Iterable

import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

def plot_time_series(series: pd.Series, title: str, output_path: Path) -> None:
    fig, ax = plt.subplots(figsize=(12, 4))
    series.plot(ax=ax)
    ax.set_title(title)
    ax.set_ylabel('Relative load')
    ax.set_xlabel('Season / Day / Hour')
    fig.tight_layout()
    fig.savefig(output_path, dpi=150)
    plt.close(fig)

def plot_overlay(observed: pd.Series, synthetic: pd.Series, title: str, output_path: Path) -> None:
    fig, ax = plt.subplots(figsize=(12, 4))
    observed.plot(ax=ax, alpha=0.7, label='Observed')
    synthetic.plot(ax=ax, color='black', linewidth=1.5, label='Synthetic')
    ax.set_title(title)
    ax.set_ylabel('Relative load')
    ax.set_xlabel('Season / Day / Hour')
    ax.legend()
    fig.tight_layout()
    fig.savefig(output_path, dpi=150)
    plt.close(fig)

def build_synthetic_profile(observed: pd.Series) -> pd.Series:
    seasonal_coeff = observed.groupby('season').mean()
    seasonal_coeff = seasonal_coeff / seasonal_coeff.max()

    daily_coeff = observed.groupby(['season', 'day']).mean()
    daily_coeff = daily_coeff / daily_coeff.groupby('season').transform('max')

    hourly_coeff = observed.groupby('hour').mean()
    hourly_coeff = hourly_coeff / hourly_coeff.max()

    seasons = observed.index.get_level_values('season')
    days = observed.index.get_level_values('day')
    hours = observed.index.get_level_values('hour')

    season_factor = seasonal_coeff.loc[seasons].to_numpy()
    season_day_index = pd.MultiIndex.from_arrays([seasons, days], names=['season', 'day'])
    day_factor = daily_coeff.reindex(season_day_index).fillna(1.0).to_numpy()
    hour_factor = hourly_coeff.loc[hours].to_numpy()

    synthetic = pd.Series(season_factor * day_factor * hour_factor, index=observed.index)
    synthetic = synthetic / synthetic.max()
    synthetic.name = 'synthetic_load'
    return synthetic


## 3. Locate folders and validate inputs
Everything happens relative to this notebook: `input/` stores user-provided CSVs and `output/` receives all charts and tables.

In [None]:

from pathlib import Path

def locate_notebook_dir() -> Path:
    here = Path().resolve()
    if (here / 'input').exists() and (here / 'output').exists():
        return here

    for parent in here.parents:
        candidate = parent / 'pre-analysis' / 'prepare-data'
        if (candidate / 'input').exists() and (candidate / 'output').exists():
            return candidate

    raise FileNotFoundError('Could not locate the prepare-data folder with input/ and output/.')

NOTEBOOK_DIR = locate_notebook_dir()
INPUT_DIR = NOTEBOOK_DIR / 'input'
OUTPUT_DIR = NOTEBOOK_DIR / 'output'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

load_profile_path = INPUT_DIR / load_profile_filename
if not load_profile_path.exists():
    raise FileNotFoundError(f'Missing load profile CSV: {load_profile_path}')

print(f'Reading load profile from {load_profile_path}')


## 4. Load the hourly profile
Read the CSV, ensure the index and columns are tidy, and preview the first few rows to confirm structure.

In [None]:
data = pd.read_csv(load_profile_path, index_col=[0, 1, 2])
data.index.names = ['season', 'day', 'hour']
data.columns = data.columns.map(str)

print(f'Loaded dataset with shape {data.shape}')
display(data.head())


## 5. Focus on the reference year
Select the requested year, describe its distribution, and export quick-look plots for the full profile and a few seasons.

In [None]:
if str(reference_year) not in data.columns:
    raise KeyError(f'Reference year {reference_year} not found in columns: {list(data.columns)}')

reference_profile = data[str(reference_year)].sort_index()
print(reference_profile.describe())

plot_time_series(
    reference_profile,
    f'Observed load profile â€” {reference_year}',
    OUTPUT_DIR / f'observed_{reference_year}.png',
)

seasons_to_plot: Iterable[int] = (2, 3, 8)
for season in seasons_to_plot:
    if season in reference_profile.index.get_level_values('season'):
        plot_time_series(
            reference_profile.xs(season, level='season'),
            f'Season {season} observed profile',
            OUTPUT_DIR / f'season_{season}_observed.png',
        )


## 6. Build simple coefficients
Capture the seasonal, daily, and hourly patterns that will later shape the synthetic profile.

In [None]:
seasonal_coeff = reference_profile.groupby('season').mean()
seasonal_coeff = seasonal_coeff / seasonal_coeff.mean()
ax = seasonal_coeff.plot(title='Seasonal coefficients')
ax.set_ylabel('Relative load')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'seasonal_coefficients.png', dpi=150)
plt.close()

daily_coeff = reference_profile.groupby(['season', 'day']).mean()
daily_coeff = daily_coeff.groupby('season').apply(lambda s: s / s.max())
display(daily_coeff.groupby('season').head(3))

hourly_coeff = reference_profile.groupby('hour').mean()
hourly_coeff = hourly_coeff / hourly_coeff.max()
ax = hourly_coeff.plot(title='Hourly coefficients')
ax.set_ylabel('Relative load')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'hourly_coefficients.png', dpi=150)
plt.close()


## 7. Build the synthetic profile and export results
Combine the coefficients, normalize the curve, save the CSV, and compare it against the observed year.

In [None]:
synthetic_profile = build_synthetic_profile(reference_profile)
synthetic_path = OUTPUT_DIR / 'load_data_synthetic.csv'
synthetic_profile.to_csv(synthetic_path)
print(f'Synthetic profile saved to {synthetic_path}')

plot_overlay(
    reference_profile,
    synthetic_profile,
    'Observed vs synthetic load profile',
    OUTPUT_DIR / 'observed_vs_synthetic.png',
)

for season in seasons_to_plot:
    if season in reference_profile.index.get_level_values('season'):
        observed_slice = reference_profile.xs(season, level='season')
        synthetic_slice = synthetic_profile.xs(season, level='season')
        plot_overlay(
            observed_slice,
            synthetic_slice,
            f'Season {season} comparison',
            OUTPUT_DIR / f'season_{season}_overlay.png',
        )
