In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

In [3]:
raw_data = pd.read_csv("./raw/temperature.csv")

In [4]:
data = raw_data[['time', 'G(i)', 'H_sun', 'T2m', "WS10m"]].copy()
data['Datetime'] = pd.to_datetime(data['time'], format="%Y%m%d:%H%M")
data.set_index('Datetime', inplace=True)

data = data.loc[:, ['G(i)', 'H_sun', 'T2m', "WS10m"]]
data.rename(columns={"G(i)": "solar irradiation", "H_sun": "solar elevation", "T2m": "temperature", "WS10m": "wind speed"}, inplace=True)

In [5]:
data_minutely = data.resample('min').interpolate(method='linear')
data_hourly = data

In [6]:
for data, resolution in zip([data_minutely, data_hourly], ["minutely", "hourly"]):
    print(f"{resolution} Max solar irradiation: ", np.max(data["solar irradiation"]))
    print(f"{resolution} Min solar irradiation: ", np.min(data["solar irradiation"]))
    print(f"{resolution} Max solar elevation: ", np.max(data["solar elevation"]))
    print(f"{resolution} Min solar elevation: ", np.min(data["solar elevation"]))
    print(f"{resolution} Max temperature: ", np.max(data["temperature"]))
    print(f"{resolution} Min temperature: ", np.min(data["temperature"]))
    print(f"{resolution} Max wind speed: ", np.max(data["wind speed"]))
    print(f"{resolution} Min wind speed: ", np.min(data["wind speed"]))

    episodes = data.groupby(pd.Grouper(freq='W'))
    required_length = 10000 if resolution == "minutely" else 165
    episodes = pd.concat([group.fillna(0) for name, group in episodes if
                                       len(group.dropna()) >= required_length and group.index[0] >= datetime(2007, 1, 1) and
                                       group.index[-1] <= datetime(2008, 12, 31)]).groupby(pd.Grouper(freq='W'))
    
    print(f"{resolution} episodes: ", len(episodes))
    demand = pd.HDFStore(f'./{resolution}/household_energy_demand.h5')
    with pd.HDFStore(f'./{resolution}/weather_and_time.h5') as store:
        # Save each DataFrame with a key
        i = 0
        for name, group in episodes:
            if name.date() == demand[f"eps_{i}"].index[-1].date():
                store[f'eps_{i}'] = group
                i += 1
        print(len(store))
    print(f"{resolution} episodes: ", i)
    file = pd.HDFStore(f'./{resolution}/weather_and_time.h5')
    print(f"Equal length: {len(file) == len(demand)}")
    file.close()
    demand.close()

minutely Max solar irradiation:  1082.1
minutely Min solar irradiation:  0.0
minutely Max solar elevation:  64.41
minutely Min solar elevation:  0.0
minutely Max temperature:  35.13
minutely Min temperature:  -10.43
minutely Max wind speed:  12.76
minutely Min wind speed:  0.0
minutely episodes:  104
101
minutely episodes:  101
Equal length: True
hourly Max solar irradiation:  1082.1
hourly Min solar irradiation:  0.0
hourly Max solar elevation:  64.41
hourly Min solar elevation:  0.0
hourly Max temperature:  35.13
hourly Min temperature:  -10.43
hourly Max wind speed:  12.76
hourly Min wind speed:  0.0
hourly episodes:  104
102
hourly episodes:  102
Equal length: True
