In [65]:
import pandas as pd
import pytz

In [71]:
# Read dataframes with datetime index
## Emissions
emissions = pd.read_csv(
    '../data/interim/emissions_germany_202301010000_202501010000.csv',
    sep=',', index_col=0
)

## Generation
regions = {
    'f_hertz': pd.read_csv(
        '../data/interim/generation_f_hertz_202301010000_202412312345.csv',
        sep=',', index_col=0
    ),
    'amprion': pd.read_csv(
        '../data/interim/generation_amprion_202301010000_202412312345.csv',
        sep=',', index_col=0
    ),
    'tennet': pd.read_csv(
        '../data/interim/generation_tennet_202301010000_202412312345.csv',
        sep=',', index_col=0
    ),
    'transnet_bw': pd.read_csv(
        '../data/interim/generation_transnet_bw_202301010000_202412312345.csv',
        sep=',', index_col=0)
}

# Convert index to datetime
for df in (emissions, *regions.values()):
    df.index = pd.to_datetime(df.index, format='%Y-%m-%d %H:%M:%S')
    df.sort_index(inplace=True)
    # Check ich any timezone is set - if not, all the same
    if df.index.tz is not None:
        print(f'Timezone set to {df.index.tz}')

for reg in regions:
    print(regions[reg].duplicated().head())

2022-12-31 23:00:00
datetime
2023-01-01 00:00:00    False
2023-01-01 00:15:00    False
2023-01-01 00:30:00    False
2023-01-01 00:45:00    False
2023-01-01 01:00:00    False
dtype: bool
datetime
2023-01-01 00:00:00    False
2023-01-01 00:15:00    False
2023-01-01 00:30:00    False
2023-01-01 00:45:00    False
2023-01-01 01:00:00    False
dtype: bool
datetime
2023-01-01 00:00:00    False
2023-01-01 00:15:00     True
2023-01-01 00:30:00     True
2023-01-01 00:45:00     True
2023-01-01 01:00:00    False
dtype: bool
datetime
2023-01-01 00:00:00    False
2023-01-01 00:15:00    False
2023-01-01 00:30:00    False
2023-01-01 00:45:00     True
2023-01-01 01:00:00    False
dtype: bool


In [67]:
"""
The generation timeseries consists of intervals such that the interval of time t contains [t, t+15)
The emissions timeseries consists of single points in time where at time t contains [t-60, t)
To align both, move emissions time series -1h such that t contains [t, t+60)
By forward filling, both time series can now be aligned, such that for emissions also t contains [t, t+15)
=> Step was performed above, now:
    --> We can compute the hourly emissions per control area, weighed by their share of conventional generation
    --> Next, we can perform a generation weighed downsampling of emissions to quarter hourly resolution
"""
# Regional allocation of emissions based on share of regional generation from total generation
## Aggregate total generation per production type and the hour
total_gen_15min = pd.concat(regions.values()).groupby(level=0).sum()
total_gen_hourly = total_gen_15min.resample('1h').sum()

## Allocate emissions to regions based on share of regional generation
regional_emissions_final = {}

for name, df_reg in regions.items():
    fuels = ['lignite', 'hard_coal', 'fossile_gas', 'other_conventionals']
    regional_emissions_15min = pd.DataFrame(index=df_reg.index)

    for fuel in fuels:
        if fuel in df_reg.columns:
            ## (1) Regional hourly generation per production type
            regional_gen_hourly = df_reg[fuel].resample('h').sum()

            ## Share of regional generation per production type on total generation per production type
            regional_share_h = (regional_gen_hourly / total_gen_hourly[fuel]).fillna(0) # In case of no generation in a region, set share to 0

            ## Regional emissions per hour
            regional_emissions_hourly = emissions[fuel] * regional_share_h

            ## (2) Temporal downscaling to 15 min
            denom = df_reg[fuel].resample('h').transform('sum')
            weights = (df_reg[fuel] / denom).fillna(0.25) # share of each quarter-hour on the generation per hour; uniform distribution in case of no generation

            ## Regional emissions per quarter-hour
            regional_emissions_15min[fuel] = regional_emissions_hourly.resample('15T').ffill() * weights # weighted upsampling of regional emissions to 15 min resolution

    # Total emissions per control area
    regional_emissions_15min['total_emission']  = regional_emissions_15min.sum(axis=1)
    regional_emissions_final[name] = regional_emissions_15min


  regional_emissions_15min[fuel] = regional_emissions_hourly.resample('15T').ffill() * weights # weighted upsampling of regional emissions to 15 min resolution


ValueError: cannot reindex on an axis with duplicate labels

In [None]:
regions.keys()

In [None]:
regions['amprion'].columns

In [None]:
total_gen_hourly.head()