In [1]:
import pandas as pd
import pytz

emissions_col_names = {
    'lignite': 'lignite_emissions',
    'hard_coal': 'hard_coal_emissions',
    'fossile_gas': 'fossil_gas_emissions',
    'other_conventionals': 'other_conventional_emission'
}

generation_col_names = {
    'lignite': 'lignite_generation',
    'hard_coal': 'hard_coal_generation',
    'fossile_gas': 'fossil_gas_generation',
    'other_conventionals': 'other_conventional_generation'
}

In [2]:
# Read dataframes with datetime index
## Emissions
emissions = pd.read_csv(
    '../data/interim/emissions_germany_utc_202212312200_202412312200.csv',
    sep=',',
    index_col=0
)

## Generation
regional_generation = {
    'f_hertz': pd.read_csv(
        '../data/interim/generation_f_hertz_utc_202212312300_202412312245.csv',
        sep=',',
        index_col=0
    ),
    'amprion': pd.read_csv(
        '../data/interim/generation_amprion_utc_202212312300_202412312245.csv',
        sep=',',
        index_col=0
    ),
    'tennet': pd.read_csv(
        '../data/interim/generation_tennet_utc_202212312300_202412312245.csv',
        sep=',',
        index_col=0
    ),
    'transnet_bw': pd.read_csv(
        '../data/interim/generation_transnet_bw_utc_202212312300_202412312245.csv',
        sep=',',
        index_col=0
    )
}

# Convert index to datetime
for df in (emissions, *regional_generation.values()):
    df.index = pd.to_datetime(df.index, format='ISO8601')
    df.sort_index(inplace=True)
    # Check ich any timezone is set - if not, all the same
    if df.index.tz is not None:
        print(f'Timezone set to {df.index.tz}')

print(f"Emissions duplicates: {emissions.index.duplicated().sum()}")
for reg in regional_generation:
    print(f"Region Duplicates: {regional_generation[reg].index.duplicated().sum()}")

Timezone set to UTC
Timezone set to UTC
Timezone set to UTC
Timezone set to UTC
Timezone set to UTC
Emissions duplicates: 0
Region Duplicates: 0
Region Duplicates: 0
Region Duplicates: 0
Region Duplicates: 0


In [3]:
# Regional allocation of emissions based on share of regional generation from total generation
## Aggregate total generation per production type and one hour
total_gen_15min = pd.concat(regional_generation.values()).groupby(level=0).sum()
total_gen_hourly = total_gen_15min.resample('1h').sum()

## Allocate emissions to regional_generation based on share of regional generation
regional_emissions_final = {}

for name, df_reg in regional_generation.items():
    fuels = ['lignite', 'hard_coal', 'fossile_gas', 'other_conventionals']
    regional_emissions_15min = pd.DataFrame(index=df_reg.index)

    for fuel in fuels:
        if fuel in df_reg.columns:
            ## (1) Regional hourly generation per production type (share of german generation that come from each area)
            regional_gen_hourly = df_reg[fuel].resample('h').sum()

            ## Share of regional generation per production type on total generation per production type
            regional_share_h = (regional_gen_hourly / total_gen_hourly[fuel]).fillna(0) # In case of no generation in a region, set share to 0

            ## Regional emissions per hour and production type
            regional_emissions_hourly = emissions[fuel] * regional_share_h

            ## (2) Temporal downscaling to 15 min
            denom = df_reg[fuel].resample('h').transform('sum') # total generation per hour
            weights = (df_reg[fuel] / denom).fillna(0.25)  # share of the generation per quarter-hour on the generation per hour (uniform distribution in case of no generation)

            ## Regional emissions per quarter-hour
            regional_emissions_15min[fuel] = (regional_emissions_hourly.resample('15min').ffill() * weights).round(2) # weighted upsampling of regional emissions to 15 min resolution (depending on the share of generation per quarter-hour from the hour, the emissions are also weighed)

    # Total emissions per control area per quarter-hour (final df: production type and total emissions per quarter-hour, weighed by generation)
    regional_emissions_15min['total_emission']  = regional_emissions_15min.sum(axis=1).round(2)
    regional_emissions_final[name] = regional_emissions_15min
    regional_emissions_final[name] = regional_emissions_final[name].rename(columns=emissions_col_names)

In [4]:
# Rename generation df for comprehensiveness
for reg in regional_generation:
    regional_generation[reg] = regional_generation[reg].rename(columns=generation_col_names)

In [5]:
# Join data frames for final processed output dataframe
for reg in regional_generation:
    # Get regional frames
    df_emi = regional_emissions_final[reg]
    df_gen = regional_generation[reg]

    # Merge on index
    final_df = pd.merge(
        df_gen,
        df_emi,
        left_index=True,
        right_index=True,
        how='inner'
    )

    # Save to file
    min_date = final_df.index.min().strftime('%Y%m%d%H%M')
    max_date = final_df.index.max().strftime('%Y%m%d%H%M')
    filename = f"final_{reg}_15min_utc_{min_date}_{max_date}"

    final_df.to_csv(f'../data/processed/{filename}', index=True)
    print(f"File saved to {filename}")

File saved to final_f_hertz_15min_utc_202212312300_202412312245
File saved to final_amprion_15min_utc_202212312300_202412312245
File saved to final_tennet_15min_utc_202212312300_202412312245
File saved to final_transnet_bw_15min_utc_202212312300_202412312245
