In [1]:
import os
import numpy as np
import pandas as pd
import json

In [2]:
inp_dir = "../data/"

In [8]:
capacity_df = pd.read_csv(os.path.join(inp_dir, "orig/Installed_Capacity_Germany.csv"), sep=";", thousands='.', decimal=',', parse_dates=['Date from', 'Date to'])
capacity_df = capacity_df[["Date from", "Wind Offshore [MW] ", "Wind Onshore [MW]", "Photovoltaic [MW]"]].rename({"Wind Offshore [MW] ": "Wind Offshore [MW]"}, axis=1)
capacity_df["Wind Total [MW]"] = capacity_df["Wind Offshore [MW]"] + capacity_df["Wind Onshore [MW]"]
capacity_df["Date from"] = capacity_df["Date from"].dt.strftime("%s").astype(int)
capacity_df.to_csv(os.path.join(inp_dir, "Installed_Capacity_Germany.csv"), index=False)

In [12]:
supply_df = pd.read_csv(os.path.join(inp_dir, "orig/Realised_Supply_Germany.csv"), sep=";", thousands='.', decimal=',')
supply_df_ = supply_df.groupby(np.arange(len(supply_df))//4).mean(numeric_only=True)
supply_df_["time"] = pd.to_datetime(supply_df.iloc[::4]["Date from"], format="%d.%m.%y %H:%M").dt.strftime("%s").values.astype(int)
supply_df_["Wind Total [MW]"] = supply_df_["Wind Offshore [MW] "] + supply_df_["Wind Onshore [MW]"]
supply_df_ = supply_df_[["time", "Wind Offshore [MW] ", "Wind Onshore [MW]", "Wind Total [MW]", "Photovoltaic [MW]"]].rename({"Wind Offshore [MW] ": "Wind Offshore [MW]"}, axis=1)
supply_df_.to_csv(os.path.join(inp_dir, "Realised_Supply_Germany.csv"), index=False)

In [14]:
demand_df = pd.read_csv(os.path.join(inp_dir, "orig/Reaslised_Demand_Germany.csv"), sep=";", thousands='.', decimal=',')
demand_df_ = demand_df.groupby(np.arange(len(demand_df))//4).mean(numeric_only=True)
demand_df_["time"] = pd.to_datetime(demand_df.iloc[::4]["Date from"], format="%d.%m.%y %H:%M").dt.strftime("%s").values.astype(int)
demand_df_ = demand_df_[["time", "Total (Grid Load) [MWh]",	"Residual Load [MWh]",	"Pumped Storage [MWh]"]]
demand_df_.to_csv(os.path.join(inp_dir, "Realised_Demand_Germany.csv"), index=False)

In [18]:
weather_df = pd.read_csv(os.path.join(inp_dir, "orig/Weather_Data_Germany.csv"), sep=',')
weather_df["time"] = pd.to_datetime(weather_df["time"], format="%Y-%m-%d %H:%M:%S").dt.strftime("%s").values.astype(int)

In [19]:
24*(365*2+366)*len(weather_df["longitude"].unique())*len(weather_df["latitude"].unique()) == len(weather_df)

True

In [23]:
all_cols = ["time", "longitude", "latitude"]
solar_cols = ["cdir", "tcc", "t2m", "ssr", "tsr", "sund", "tp"]
wind_cols = ["z", "msl", "u10", "v10", "u100", "v100", "t2m", "blh"]
cdf_cols = ["cdir", "ssr", "tsr", "sund", "tp"]

solar_cdf_idx = []
wind_cdf_idx = []
for col in cdf_cols:
    if col in solar_cols:
        solar_cdf_idx.append(solar_cols.index(col))
    if col in wind_cols:
        wind_cdf_idx.append(wind_cols.index(col))

In [24]:
solar_df = weather_df[all_cols + solar_cols]
wind_df = weather_df[all_cols + wind_cols]

In [25]:
latitude_idx = {b:a for a,b in zip(range(len(weather_df["latitude"].unique())), sorted(weather_df["latitude"].unique().tolist()))}
longitude_idx = {b:a for a,b in zip(range(len(weather_df["longitude"].unique())), sorted(weather_df["longitude"].unique().tolist()))}
time_idx = {b:a for a,b in zip(range(len(weather_df["time"].unique())), sorted(weather_df["time"].unique().tolist()))}

In [26]:
def row_to_mat_wrap(mat, time_idx, latitude_idx, longitude_idx):
    def row_to_mat(row):
        mat[time_idx[row[0]], latitude_idx[row[2]], longitude_idx[row[1]], :] = row[3:]
    return row_to_mat

In [27]:
solar_mat = np.empty((24*(365*2+366), len(latitude_idx), len(longitude_idx), len(solar_cols)))
wind_mat = np.empty((24*(365*2+366), len(latitude_idx), len(longitude_idx), len(wind_cols)))

In [28]:
%%capture
solar_df.apply(row_to_mat_wrap(solar_mat, time_idx, latitude_idx, longitude_idx), axis=1)
wind_df.apply(row_to_mat_wrap(wind_mat, time_idx, latitude_idx, longitude_idx), axis=1)

In [29]:
for i in solar_cdf_idx:
    solar_mat[1:, :, :, i] = solar_mat[1:, :, :, i] - solar_mat[:-1, :, :, i]
solar_mat[solar_mat<0] = 0
for i in wind_cdf_idx:
    wind_mat[1:, :, :, i] = wind_mat[1:, :, :, i] - wind_mat[:-1, :, :, i]
wind_mat[wind_mat<0] = 0

In [30]:
np.save(os.path.join(inp_dir, "solar.npy"), solar_mat)
np.save(os.path.join(inp_dir, "wind.npy"), wind_mat)
with open(os.path.join(inp_dir, "weather_meta.json"), "w") as f:
    json.dump({"time": time_idx, "latitude": latitude_idx, "longitude": longitude_idx}, f)

In [31]:
solar_df.to_csv(os.path.join(inp_dir, "Solar.csv"), index=False)
wind_df.to_csv(os.path.join(inp_dir, "Wind.csv"), index=False)

In [32]:
weather_2022_df = pd.read_csv(os.path.join(inp_dir, "orig/Weather_Data_Germany_2022.csv"), sep=',', parse_dates=['forecast_origin', 'time'])
weather_2022_df["time"] = pd.to_datetime(weather_2022_df["time"], format="%d.%m.%y %H:%M").dt.strftime("%s").values.astype(int)

In [33]:
solar_2022_df = weather_2022_df[all_cols + solar_cols]
wind_2022_df = weather_2022_df[all_cols + wind_cols]

latitude_2022_idx = {b:a for a,b in zip(range(len(weather_2022_df["latitude"].unique())), sorted(weather_2022_df["latitude"].unique().tolist()))}
longitude_2022_idx = {b:a for a,b in zip(range(len(weather_2022_df["longitude"].unique())), sorted(weather_2022_df["longitude"].unique().tolist()))}
time_2022_idx = {b:a for a,b in zip(range(len(weather_2022_df["time"].unique())), sorted(weather_2022_df["time"].unique().tolist()))}

In [34]:
len(weather_2022_df) == 365*24*len(longitude_2022_idx)*len(latitude_2022_idx)

True

In [35]:
solar_2022_mat = np.empty((24*365, len(latitude_2022_idx), len(longitude_2022_idx), len(solar_cols)))
wind_2022_mat = np.empty((24*365, len(latitude_2022_idx), len(longitude_2022_idx), len(wind_cols)))

In [36]:
%%capture
solar_2022_df.apply(row_to_mat_wrap(solar_2022_mat, time_2022_idx, latitude_2022_idx, longitude_2022_idx), axis=1)
wind_2022_df.apply(row_to_mat_wrap(wind_2022_mat, time_2022_idx, latitude_2022_idx, longitude_2022_idx), axis=1)

In [37]:
for i in solar_cdf_idx:
    solar_2022_mat[1:, :, :, i] = solar_2022_mat[1:, :, :, i] - solar_2022_mat[:-1, :, :, i]
solar_2022_mat[solar_2022_mat<0] = 0
for i in wind_cdf_idx:
    wind_2022_mat[1:, :, :, i] = wind_2022_mat[1:, :, :, i] - wind_2022_mat[:-1, :, :, i]
wind_2022_mat[wind_2022_mat<0] = 0

In [39]:
np.save(os.path.join(inp_dir, "solar_2022.npy"), solar_2022_mat)
np.save(os.path.join(inp_dir, "wind_2022.npy"), wind_2022_mat)
with open(os.path.join(inp_dir, "weather_meta_2022.json"), "w") as f:
    json.dump({"time": time_2022_idx, "latitude": latitude_2022_idx, "longitude": longitude_2022_idx}, f)

In [40]:
solar_2022_df.to_csv(os.path.join(inp_dir, "Solar_2022.csv"), index=False)
wind_2022_df.to_csv(os.path.join(inp_dir, "Wind_2022.csv"), index=False)