In [5]:
import numpy as np
import pandas as pd
import holidays

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from load_data import load_data, split_data

data = load_data("1h")
all_columns = data.keys()
all_columns

Index(['MWh', 'temperature_fore_ch', 'temperature_fore_fr',
       'temperature_fore_de', 'temperature_fore_it', 'solar_fore_de [MW]',
       'solar_fore_it [MW]', 'wind_fore_de [MW]', 'wind_fore_it [MW]', 'CH_AT',
       'CH_DE', 'CH_FR', 'CH_IT', 'AT_CH', 'DE_CH', 'FR_CH', 'IT_CH'],
      dtype='object')

In [6]:
columns_to_drop = [
    "CH_AT",
    "CH_DE",
    "CH_FR",
    "CH_IT",
    "AT_CH",
    "DE_CH",
    "FR_CH",
    "IT_CH",
]

In [7]:
data_filtered = data.drop(columns=columns_to_drop)
data_filtered

Unnamed: 0,MWh,temperature_fore_ch,temperature_fore_fr,temperature_fore_de,temperature_fore_it,solar_fore_de [MW],solar_fore_it [MW],wind_fore_de [MW],wind_fore_it [MW]
2019-01-01 00:00:00+00:00,129.716036,4.106700,5.972900,7.426800,4.028100,0.0,0.0,23052.3310,4596.5916
2019-01-01 01:00:00+00:00,133.398074,2.637812,5.080813,6.724312,2.372176,0.0,0.0,24969.9701,4478.5564
2019-01-01 02:00:00+00:00,135.133852,1.701501,4.600184,6.284190,1.437560,0.0,0.0,27082.9626,4323.3712
2019-01-01 03:00:00+00:00,131.699424,1.221641,4.468391,6.066192,1.115047,0.0,0.0,26890.9717,4231.8283
2019-01-01 04:00:00+00:00,147.391128,1.122105,4.622812,6.030072,1.295434,0.0,0.0,27740.1555,4266.3082
...,...,...,...,...,...,...,...,...,...
2021-12-31 18:00:00+00:00,171.707318,8.530000,9.960000,10.790000,9.590000,0.0,0.0,36997.7200,1108.4000
2021-12-31 19:00:00+00:00,159.462903,8.000000,9.400000,10.630000,9.110000,0.0,0.0,35666.9300,1077.9700
2021-12-31 20:00:00+00:00,155.109520,7.500000,8.880000,10.510000,8.670000,0.0,0.0,34383.8800,1048.2800
2021-12-31 21:00:00+00:00,171.370277,6.970000,8.510000,10.320000,8.140000,0.0,0.0,33075.2500,1078.7800


In [27]:
# Create a new DataFrame with a date index
date_index = np.unique(data_filtered.index.date)
new_df = pd.DataFrame(index=date_index)

# Initialize an empty array for each date with 24 values
new_df['MWh'] = [np.array([np.nan] * 24) for _ in range(len(date_index))]

new_df

Unnamed: 0,MWh
2019-01-01,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2019-01-02,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2019-01-03,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2019-01-04,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2019-01-05,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
...,...
2021-12-27,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2021-12-28,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2021-12-29,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2021-12-30,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [31]:
new_df.shape, new_df.iloc[0].shape

((1096, 1), (1,))

In [34]:
# Populate the array with the hourly values
for date in date_index:
    #hourly_values = active_losses_df.loc[date.strftime('%Y-%m-%d')]['MWh'].values
    #new_df.at[date, 'MWh'][:len(hourly_values)] = hourly_values
    
    date_values = [data_filtered.loc[date.strftime('%Y-%m-%d')]['MWh']]
    new_df.loc[date] = date_values

[2019-01-01 00:00:00+00:00    129.716036
2019-01-01 01:00:00+00:00    133.398074
2019-01-01 02:00:00+00:00    135.133852
2019-01-01 03:00:00+00:00    131.699424
2019-01-01 04:00:00+00:00    147.391128
2019-01-01 05:00:00+00:00    167.629536
2019-01-01 06:00:00+00:00    189.136693
2019-01-01 07:00:00+00:00    197.988811
2019-01-01 08:00:00+00:00    193.205145
2019-01-01 09:00:00+00:00    188.108696
2019-01-01 10:00:00+00:00    177.246809
2019-01-01 11:00:00+00:00    147.491664
2019-01-01 12:00:00+00:00    146.295818
2019-01-01 13:00:00+00:00    173.668949
2019-01-01 14:00:00+00:00    204.100601
2019-01-01 15:00:00+00:00    197.341091
2019-01-01 16:00:00+00:00    174.247253
2019-01-01 17:00:00+00:00    167.027968
2019-01-01 18:00:00+00:00    160.993766
2019-01-01 19:00:00+00:00    189.936692
2019-01-01 20:00:00+00:00    188.365301
2019-01-01 21:00:00+00:00    197.986030
2019-01-01 22:00:00+00:00    180.193706
2019-01-01 23:00:00+00:00    163.367636
Freq: H, Name: MWh, dtype: float64]
[20

[2020-07-01 00:00:00+00:00     59.928976
2020-07-01 01:00:00+00:00     69.128785
2020-07-01 02:00:00+00:00     70.489349
2020-07-01 03:00:00+00:00     65.961246
2020-07-01 04:00:00+00:00     78.715227
2020-07-01 05:00:00+00:00     98.393308
2020-07-01 06:00:00+00:00    100.440481
2020-07-01 07:00:00+00:00    101.632388
2020-07-01 08:00:00+00:00    111.950893
2020-07-01 09:00:00+00:00    103.978307
2020-07-01 10:00:00+00:00     88.015778
2020-07-01 11:00:00+00:00     83.968534
2020-07-01 12:00:00+00:00     88.664788
2020-07-01 13:00:00+00:00     88.171213
2020-07-01 14:00:00+00:00     91.182565
2020-07-01 15:00:00+00:00     93.562434
2020-07-01 16:00:00+00:00    104.706124
2020-07-01 17:00:00+00:00    105.452174
2020-07-01 18:00:00+00:00    111.695419
2020-07-01 19:00:00+00:00    107.537049
2020-07-01 20:00:00+00:00     94.283474
2020-07-01 21:00:00+00:00     76.941733
2020-07-01 22:00:00+00:00     69.271735
2020-07-01 23:00:00+00:00     58.638832
Freq: H, Name: MWh, dtype: float64]
[20

[2021-04-01 00:00:00+00:00     89.819432
2021-04-01 01:00:00+00:00     90.486750
2021-04-01 02:00:00+00:00     97.688169
2021-04-01 03:00:00+00:00    105.445651
2021-04-01 04:00:00+00:00     86.916837
2021-04-01 05:00:00+00:00     83.532516
2021-04-01 06:00:00+00:00     89.144800
2021-04-01 07:00:00+00:00     84.558663
2021-04-01 08:00:00+00:00     97.219889
2021-04-01 09:00:00+00:00    115.375957
2021-04-01 10:00:00+00:00     98.361528
2021-04-01 11:00:00+00:00    128.476641
2021-04-01 12:00:00+00:00    147.270129
2021-04-01 13:00:00+00:00    143.552622
2021-04-01 14:00:00+00:00    127.312179
2021-04-01 15:00:00+00:00    110.300143
2021-04-01 16:00:00+00:00     99.447401
2021-04-01 17:00:00+00:00     94.812107
2021-04-01 18:00:00+00:00     83.002915
2021-04-01 19:00:00+00:00     98.683168
2021-04-01 20:00:00+00:00    117.655390
2021-04-01 21:00:00+00:00    101.673091
2021-04-01 22:00:00+00:00    100.270044
2021-04-01 23:00:00+00:00    100.217967
Freq: H, Name: MWh, dtype: float64]
[20

Unnamed: 0,MWh
2019-01-01,2019-01-01 00:00:00+00:00 129.716036 2019-0...
2019-01-02,2019-01-02 00:00:00+00:00 160.254575 2019-0...
2019-01-03,2019-01-03 00:00:00+00:00 161.491635 2019-0...
2019-01-04,2019-01-04 00:00:00+00:00 197.708791 2019-0...
2019-01-05,2019-01-05 00:00:00+00:00 119.988774 2019-0...
...,...
2021-12-27,2021-12-27 00:00:00+00:00 88.593463 2021-1...
2021-12-28,2021-12-28 00:00:00+00:00 121.391864 2021-1...
2021-12-29,2021-12-29 00:00:00+00:00 115.549291 2021-1...
2021-12-30,2021-12-30 00:00:00+00:00 132.491591 2021-1...


In [35]:
new_df

Unnamed: 0,MWh
2019-01-01,2019-01-01 00:00:00+00:00 129.716036 2019-0...
2019-01-02,2019-01-02 00:00:00+00:00 160.254575 2019-0...
2019-01-03,2019-01-03 00:00:00+00:00 161.491635 2019-0...
2019-01-04,2019-01-04 00:00:00+00:00 197.708791 2019-0...
2019-01-05,2019-01-05 00:00:00+00:00 119.988774 2019-0...
...,...
2021-12-27,2021-12-27 00:00:00+00:00 88.593463 2021-1...
2021-12-28,2021-12-28 00:00:00+00:00 121.391864 2021-1...
2021-12-29,2021-12-29 00:00:00+00:00 115.549291 2021-1...
2021-12-30,2021-12-30 00:00:00+00:00 132.491591 2021-1...


In [8]:
# Create a new DataFrame with a date index
date_index = data_filtered.index.date
new_df = pd.DataFrame(index=date_index)

# Initialize an empty array for each date with 24 values
new_df['MWh'] = [np.array([np.nan] * 24) for _ in range(len(date_index))]

# Populate the array with the hourly values
for date in date_index:
    #hourly_values = active_losses_df.loc[date.strftime('%Y-%m-%d')]['MWh'].values
    #new_df.at[date, 'MWh'][:len(hourly_values)] = hourly_values
    
    date_values = data_filtered.loc[date.strftime('%Y-%m-%d')]
    #print(date_values)
    #print(date_values['MWh'].values)
    new_df.loc[date] = date_values.values

# Optional: Fill NaN values with zeros
new_df['MWh'] = new_df['MWh'].apply(lambda x: np.nan_to_num(x, nan=0))

# Print the resulting DataFrame
print(new_df) 

ValueError: could not broadcast input array from shape (24,9) into shape (24,1)