In [None]:
import datetime as dt

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from data import load_master_data, load_measurement, load_best_forecasts, load_forecast, LOCAL_TZ, COL_TSO, \
    COL_CAPACITY, COL_PLANT, COL_TIMESTAMP
%matplotlib widget

In [None]:
master_data = load_master_data()
master_data.head()

In [None]:
master_data[[COL_TSO, COL_PLANT]].groupby(COL_TSO).count()

In [None]:
master_data[[COL_TSO, COL_CAPACITY]].groupby(COL_TSO).sum()

In [None]:
master_data_grouped = master_data.groupby(COL_TSO).agg({
    COL_CAPACITY: ['sum', 'mean'],
    COL_PLANT: 'count'},
)
master_data_grouped

In [None]:
master_data_grouped[COL_CAPACITY]

In [None]:
master_data_flat_grouped = master_data.groupby(COL_TSO).agg(
    capacity_sum=pd.NamedAgg(COL_CAPACITY, 'sum'),
    capacity_mean=pd.NamedAgg(COL_CAPACITY, 'mean'),
    plant_count=pd.NamedAgg(COL_PLANT, 'count'),
)
master_data_flat_grouped

In [None]:
master_data_flat_grouped['own_mean'] = master_data_flat_grouped['capacity_sum'] / master_data_flat_grouped['plant_count']
master_data_flat_grouped

In [None]:
master_data_flat_grouped.join(master_data_grouped[COL_CAPACITY]['mean'])

In [None]:
measurements = load_measurement()
measurements

In [None]:
fig, axes = plt.subplots()
measurements.plot(ax=axes)
plt.show()

In [None]:
fig, axes = plt.subplots()
measurements['Plant_01'].plot(ax=axes)
plt.show()

In [None]:
start = LOCAL_TZ.localize(dt.datetime(2024, 10, 27))
end = start + dt.timedelta(days=2)
fig, axes = plt.subplots()
measurements.loc[start:end, 'Plant_01'].plot(ax=axes)
plt.show()

In [None]:
fig, axes = plt.subplots()
measurements.loc[start:end, 'Plant_01'].plot(ax=axes)
measurements.loc[start:end, 'Plant_01'].rolling(window=10).mean().plot(ax=axes)
measurements.loc[start:end, 'Plant_01'].rolling(window=dt.timedelta(hours=2)).mean().plot(ax=axes)
plt.show()

In [None]:
measurements.describe()

In [None]:
max_per_plant = measurements.describe().loc[['max'], :].T
max_per_plant.head()

In [None]:
max_vs_installed_cap = max_per_plant.join(master_data.set_index(COL_PLANT)[[COL_CAPACITY]])
max_vs_installed_cap.head()

In [None]:
(max_vs_installed_cap['max'] / max_vs_installed_cap[COL_CAPACITY]).var()

In [None]:
plant_to_tso = {plant: tso for plant, tso in master_data[[COL_PLANT, COL_TSO]].itertuples(name=None, index=False)}
tso_measurement = measurements.copy(deep=True)
tso_measurement = tso_measurement.rename(columns=plant_to_tso)
tso_measurement = tso_measurement.T.groupby(tso_measurement.columns).sum().T
fig, axes = plt.subplots()
measurements.loc[start:end, 'Plant_01'].plot(ax=axes)
plt.show()

In [None]:
portfolio_measurement = measurements.sum(axis=1).to_frame('measurement')
portfolio_measurement.plot()

In [None]:
portfolio_measurement['hour'] = portfolio_measurement.index.hour
fig, axes = plt.subplots()
data_sets = [
    portfolio_measurement.loc[portfolio_measurement['hour']==hour, 'measurement'].values
    for hour in portfolio_measurement['hour'].unique()
]
axes.violinplot(dataset=data_sets)
plt.show()

In [None]:
best_forecasts = load_best_forecasts()
best_forecasts.head()

In [None]:
fc_vs_measure = best_forecasts.sum(axis=1).to_frame('forecast').join(portfolio_measurement)
fc_vs_measure

In [None]:
fc_vs_measure.plot()

In [None]:
bias = (fc_vs_measure['forecast'] - fc_vs_measure['measurement']).mean()
rmse = np.sqrt(((fc_vs_measure["forecast"] - fc_vs_measure["measurement"])**2).mean())
print(f'{bias=}, {rmse=}')