In [1]:
import pandas as pd
import numpy as np

In [2]:
from pandas.core.window.common import _flex_binary_moment
from pandas.core.window.expanding import _Rolling_and_Expanding

def weighted_average(self, weights, **kwargs):
    weights = self._shallow_copy(weights)
    window = self._get_window(weights)

    def _get_weighted_average(X, Y):
        X = X.astype('float64')
        Y = Y.astype('float64')
        sum_f = lambda x: x.rolling(window, self.min_periods, center=self.center).sum(**kwargs)
        return sum_f(X * Y) / sum_f(Y)

    return _flex_binary_moment(self._selected_obj, weights._selected_obj,
                               _get_weighted_average, pairwise=True)

_Rolling_and_Expanding.weighted_average = weighted_average

In [3]:
sensors = pd.read_csv('../data/01_raw/metadata/wind_turbine_data.csv', sep=';',  header = 0, names= ['id', 'power', 'commissioning_date', 'hub_height', 'diameter', 'nuts_id', 'lon', 'lat', 'NUTS1'], usecols= ['power', 'commissioning_date', 'hub_height', 'diameter', 'nuts_id', 'lon', 'lat'], parse_dates= ['commissioning_date'], infer_datetime_format= True)

In [4]:
dir(sensors)

['T',
 '_AXIS_LEN',
 '_AXIS_NAMES',
 '_AXIS_NUMBERS',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_TO_AXIS_NUMBER',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__re

In [5]:
sensors_datesorted = sensors.sort_values('commissioning_date')

In [6]:
def _power_weighted_average(coord):
    return np.average(coord, weights=sensors_datesorted.loc[coord.index, 'power'])

sensors_daily_aggregated = sensors_datesorted \
    .groupby(by=['nuts_id', 'commissioning_date'], sort=False) \
    .agg(
         power=('power', 'sum'),
         lat=('lat', _power_weighted_average),
         lon=('lon', _power_weighted_average),
    ) \
    .reset_index() \
    .set_index('commissioning_date')

In [7]:
sensors_daily_aggregated_simpleindex = sensors_daily_aggregated.reset_index()

sensors_daily_aggregated_simpleindex.tail(3)

Unnamed: 0,commissioning_date,nuts_id,power,lat,lon
13588,2017-11-01,DE11D,3000.0,49.034109,10.177751
13589,2017-11-03,DEB15,2400.0,49.639484,7.20628
13590,2017-12-01,DEB3H,3300.0,49.170525,8.239301


In [8]:
single_district_data = sensors_daily_aggregated_simpleindex.loc[sensors_daily_aggregated_simpleindex['nuts_id'] == 'DEF0C'].set_index('commissioning_date')

single_district_data[['lat', 'lon']].tail(3)

Unnamed: 0_level_0,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-15,54.72028,9.301851
2017-03-13,54.389937,9.450079
2017-03-27,54.388749,9.45541


In [9]:
power_centroids_ts = single_district_data[['lat', 'lon']] \
.expanding() \
.weighted_average(weights=single_district_data['power']) 

In [10]:
power_centroids_ts.tail()

Unnamed: 0_level_0,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-28,54.671404,9.306583
2016-12-13,54.670255,9.307216
2016-12-15,54.670455,9.307194
2017-03-13,54.669452,9.307705
2017-03-27,54.668452,9.308232


In [11]:
single_district_data[['lat', 'lon']].tail()

Unnamed: 0_level_0,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-28,54.522999,9.402982
2016-12-13,54.385309,9.464164
2016-12-15,54.72028,9.301851
2017-03-13,54.389937,9.450079
2017-03-27,54.388749,9.45541


In [12]:
sensors_daily_aggregated_simpleindex = sensors_daily_aggregated_simpleindex.set_index('commissioning_date')

In [13]:
sensors_df = sensors_daily_aggregated_simpleindex.copy(deep=False)

In [14]:
power_centroids_mts = {}

for district in np.unique(sensors_df['nuts_id']):
    single_district_data = sensors_df.loc[sensors_df['nuts_id'] == district]
    power_centroids_mts[district] = single_district_data[['lat', 'lon']] \
        .expanding() \
        .weighted_average(weights=single_district_data['power']) 

In [69]:
power_centroids_mts['DEF0C'].tail(3)

coords,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-29,,
2017-11-30,,
2017-12-01,,


In [66]:
power_centroids_mts = pd.DataFrame(
    index = pd.date_range(
        start=sensors_daily_aggregated_simpleindex.index.min(), 
        end=sensors_daily_aggregated_simpleindex.index.max(), 
        freq='D',
        name='commissioning_date'
    ),
    columns = pd.MultiIndex.from_product(
        [sensors_df['nuts_id'].unique(), ['lat', 'lon']],
        names=['nuts_id', 'coords'],
    ),
)

power_centroids_mts

nuts_id,DEF0C,DEF0C,DEF08,DEF08,DEA43,DEA43,DEF07,DEF07,DEA5B,DEA5B,...,DEA2C,DEA2C,DE221,DE221,DE239,DE239,DE21L,DE21L,DE715,DE715
coords,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon,...,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon
commissioning_date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1990-01-11,,,,,,,,,,,...,,,,,,,,,,
1990-01-12,,,,,,,,,,,...,,,,,,,,,,
1990-01-13,,,,,,,,,,,...,,,,,,,,,,
1990-01-14,,,,,,,,,,,...,,,,,,,,,,
1990-01-15,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-11-27,,,,,,,,,,,...,,,,,,,,,,
2017-11-28,,,,,,,,,,,...,,,,,,,,,,
2017-11-29,,,,,,,,,,,...,,,,,,,,,,
2017-11-30,,,,,,,,,,,...,,,,,,,,,,


In [67]:
for district in sensors_df['nuts_id'].unique():
    single_district_data = sensors_df[sensors_df['nuts_id'] == district]
    power_centroids_mts[district] = single_district_data[['lat', 'lon']] \
        .expanding() \
        .weighted_average(weights=single_district_data['power']) 

In [16]:
power_centroids_mts_tuple = pd.Series(
    list(zip(
        power_centroids_mts['DEF0C'].lat,
        power_centroids_mts['DEF0C'].lon
        )
    ),
    index=power_centroids_mts['DEF0C'].index,
    name='geocenter_lat_lon',
)

In [68]:
power_centroids_mts

nuts_id,DEF0C,DEF0C,DEF08,DEF08,DEA43,DEA43,DEF07,DEF07,DEA5B,DEA5B,...,DEA2C,DEA2C,DE221,DE221,DE239,DE239,DE21L,DE21L,DE715,DE715
coords,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon,...,lat,lon,lat,lon,lat,lon,lat,lon,lat,lon
commissioning_date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1990-01-11,54.592251,9.398496,,,,,,,,,...,,,,,,,,,,
1990-01-12,,,,,,,,,,,...,,,,,,,,,,
1990-01-13,,,,,,,,,,,...,,,,,,,,,,
1990-01-14,,,,,,,,,,,...,,,,,,,,,,
1990-01-15,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-11-27,,,,,,,,,,,...,,,,,,,,,,
2017-11-28,,,,,,,,,,,...,,,,,,,,,,
2017-11-29,,,,,,,,,,,...,,,,,,,,,,
2017-11-30,,,,,,,,,,,...,,,,,,,,,,


In [77]:
power_centroids_mts.loc['2017-03', 'DEF0C'].tail(20)

coords,lat,lon
commissioning_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-03-12,,
2017-03-13,54.669452,9.307705
2017-03-14,,
2017-03-15,,
2017-03-16,,
2017-03-17,,
2017-03-18,,
2017-03-19,,
2017-03-20,,
2017-03-21,,
