In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import pvlib
import pandas as pd
import numpy as np
from datetime import datetime

In [3]:
raw_data = pickle.load(open("./raw/generation.pickle", "rb"))
raw_data = pd.DataFrame({"Datetime": raw_data.times, "AC": raw_data.ac})

In [4]:
data = raw_data[['Datetime', 'AC']].copy()
data['Datetime'] = pd.to_datetime(data['Datetime'])
data.set_index('Datetime', inplace=True)

data.rename(columns={"AC": "energy"}, inplace=True)

data["energy"] /= 10 # W to kW  and 100fold the generation

In [5]:
data_minutely = data.resample('min').interpolate(method='linear')
data_hourly = data

In [6]:
for data, resolution in zip([data_minutely, data_hourly], ["minutely", "hourly"]):
    print(f"{resolution} Max: ", np.max(data['energy']))
    print(f"{resolution} Min: ", np.min(data['energy']))

    episodes = data.groupby(pd.Grouper(freq='W'))
    
    required_length = 10000 if resolution == "minutely" else 165
    
    episodes = pd.concat([group.fillna(0) for name, group in episodes if
                                       len(group.dropna()) >= required_length and group.index[0] >= datetime(2007, 1, 1) and
                                       group.index[-1] <= datetime(2008, 12, 31)]).groupby(pd.Grouper(freq='W'))
    
    print(f"{resolution} episodes: ", len(episodes))
    demand = pd.HDFStore(f'./{resolution}/household_energy_demand.h5')
    with pd.HDFStore(f'./{resolution}/rooftop_solar_array.h5') as store:
        i = 0
        for name, group in episodes:
            if name.date() == demand[f"eps_{i}"].index[-1].date() and not group.empty:
                store[f'eps_{i}'] = group
                i += 1
        print(len(store))
    print(f"{resolution} episodes: ", i)
    file = pd.HDFStore(f'./{resolution}/rooftop_solar_array.h5')
    print(f"Equal length: {len(file) == len(demand)}")
    file.close()
    demand.close()

minutely Max:  44.00727161894718
minutely Min:  -0.42697428328038
minutely episodes:  104
                     energy
Datetime                   
2008-03-03 00:00:00  -0.303
2008-03-03 00:01:00  -0.303
2008-03-03 00:02:00  -0.303
2008-03-03 00:03:00  -0.303
2008-03-03 00:04:00  -0.303
...                     ...
2008-03-09 23:55:00  -0.303
2008-03-09 23:56:00  -0.303
2008-03-09 23:57:00  -0.303
2008-03-09 23:58:00  -0.303
2008-03-09 23:59:00  -0.303

[10080 rows x 1 columns]
101
minutely episodes:  101
Equal length: True
hourly Max:  44.00727161894718
hourly Min:  -0.42697428328038
hourly episodes:  104
                     energy
Datetime                   
2008-02-25 00:10:00  -0.303
2008-02-25 01:10:00  -0.303
2008-02-25 02:10:00  -0.303
2008-02-25 03:10:00  -0.303
2008-02-25 04:10:00  -0.303
...                     ...
2008-03-02 19:10:00  -0.303
2008-03-02 20:10:00  -0.303
2008-03-02 21:10:00  -0.303
2008-03-02 22:10:00  -0.303
2008-03-02 23:10:00  -0.303

[168 rows x 1 columns]
1