In [1]:
import pandas as pd
import numpy as np

In [2]:
from pandas.core.window.common import _flex_binary_moment
from pandas.core.window.expanding import _Rolling_and_Expanding

def weighted_average(self, weights, **kwargs):
    weights = self._shallow_copy(weights)
    window = self._get_window(weights)

    def _get_weighted_average(X, Y):
        X = X.astype('float64')
        Y = Y.astype('float64')
        sum_f = lambda x: x.rolling(window, self.min_periods, center=self.center).sum(**kwargs)
        return sum_f(X * Y) / sum_f(Y)

    return _flex_binary_moment(self._selected_obj, weights._selected_obj,
                               _get_weighted_average, pairwise=True)

_Rolling_and_Expanding.weighted_average = weighted_average

In [3]:
sensors = pd.read_csv(
    '../data/01_raw/metadata/wind_turbine_data.csv', 
    sep=';',  
    header = 0, 
    names= [
        'id', 'power', 'commissioning_date', 'hub_height', 
        'diameter', 'nuts_id', 'lon', 'lat', 'NUTS1'
    ], 
    usecols= ['power', 'commissioning_date', 'hub_height', 'diameter', 'nuts_id', 'lon', 'lat'], 
    parse_dates= ['commissioning_date'], 
    infer_datetime_format= True)

In [4]:
sensors_datesorted = sensors.sort_values('commissioning_date')

In [5]:
def _power_weighted_average(coord):
    return np.average(coord, weights=sensors_datesorted.loc[coord.index, 'power'])

sensors_daily_aggregated = sensors_datesorted \
    .groupby(by=['nuts_id', 'commissioning_date'], sort=False) \
    .agg(
         power=('power', 'sum'),
         lat=('lat', _power_weighted_average),
         lon=('lon', _power_weighted_average),
    ) \
    .reset_index() \
    .set_index('commissioning_date')

In [6]:
power_centroids_mts = pd.DataFrame(
    index = pd.date_range(
        start=sensors_daily_aggregated.index.min(), 
        end=sensors_daily_aggregated.index.max(), 
        freq='D',
        name='commissioning_date'
    ),
    columns = pd.MultiIndex.from_product(
        [sensors_daily_aggregated['nuts_id'].unique(), ['lat', 'lon']],
        names=['nuts_id', 'coords'],
    ),
)

display( power_centroids_mts.head(3) )
display( power_centroids_mts.info() )

nuts_id,DEF0C,DEF0C,DEF08,DEF08,DEA43,DEA43,DEF07,DEF07,DEA5B,DEA5B,...,DEA2C,DEA2C,DE221,DE221,DE239,DE239,DE21L,DE21L,DE715,DE715
coords,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon,...,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon
commissioning_date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1990-01-11,,,,,,,,,,,...,,,,,,,,,,
1990-01-12,,,,,,,,,,,...,,,,,,,,,,
1990-01-13,,,,,,,,,,,...,,,,,,,,,,


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10187 entries, 1990-01-11 to 2017-12-01
Freq: D
Columns: 606 entries, ('DEF0C', 'lat') to ('DE715', 'lon')
dtypes: object(606)
memory usage: 47.2+ MB


None

In [7]:
for district in sensors_daily_aggregated['nuts_id'].unique():
    single_district_data = sensors_daily_aggregated[ sensors_daily_aggregated['nuts_id'] == district ]
    power_centroids_mts[district] = single_district_data[['lat', 'lon']] \
        .expanding() \
        .weighted_average(weights=single_district_data['power']) 

In [8]:
display( power_centroids_mts.head(3) )
display( power_centroids_mts.info() )

nuts_id,DEF0C,DEF0C,DEF08,DEF08,DEA43,DEA43,DEF07,DEF07,DEA5B,DEA5B,...,DEA2C,DEA2C,DE221,DE221,DE239,DE239,DE21L,DE21L,DE715,DE715
coords,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon,...,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon
commissioning_date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1990-01-11,54.592251,9.398496,,,,,,,,,...,,,,,,,,,,
1990-01-12,,,,,,,,,,,...,,,,,,,,,,
1990-01-13,,,,,,,,,,,...,,,,,,,,,,


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10187 entries, 1990-01-11 to 2017-12-01
Freq: D
Columns: 606 entries, ('DEF0C', 'lat') to ('DE715', 'lon')
dtypes: float64(606)
memory usage: 47.2 MB


None

In [11]:
power_centroids_mts.loc['2015-12', 'DEF0C'].tail(20)

coords,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-12,,
2015-12-13,,
2015-12-14,,
2015-12-15,54.675157,9.305429
2015-12-16,54.674072,9.306019
2015-12-17,54.674097,9.305532
2015-12-18,,
2015-12-19,,
2015-12-20,,
2015-12-21,54.673033,9.30614


In [18]:
power_centroids_mts = power_centroids_mts.ffill().bfill()

In [14]:
power_centroids_mts3['DEF0C'].head(20)

coords,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-01-11,54.592251,9.398496
1990-01-12,54.592251,9.398496
1990-01-13,54.592251,9.398496
1990-01-14,54.592251,9.398496
1990-01-15,54.592251,9.398496
1990-01-16,54.592251,9.398496
1990-01-17,54.592251,9.398496
1990-01-18,54.592251,9.398496
1990-01-19,54.592251,9.398496
1990-01-20,54.592251,9.398496


In [16]:
power_centroids_mts3['DEF0C'].tail(20)

coords,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-12,54.668452,9.308232
2017-11-13,54.668452,9.308232
2017-11-14,54.668452,9.308232
2017-11-15,54.668452,9.308232
2017-11-16,54.668452,9.308232
2017-11-17,54.668452,9.308232
2017-11-18,54.668452,9.308232
2017-11-19,54.668452,9.308232
2017-11-20,54.668452,9.308232
2017-11-21,54.668452,9.308232


In [19]:
power_centroids_mts4.isna().sum()

nuts_id  coords
DEF0C    lat       0
         lon       0
DEF08    lat       0
         lon       0
DEA43    lat       0
                  ..
DE239    lon       0
DE21L    lat       0
         lon       0
DE715    lat       0
         lon       0
Length: 606, dtype: int64

In [21]:
power_centroids_mts4.equals(power_centroids_mts3)

True