# Generating synthetic time series for PanTaGruEl

PanTaGruEl is a model of the European high-voltage network. It contains nearly 4000 nodes for which synthetic time series must be created. This notebooks describes the procedure.

In [1]:
import sys
sys.path.append('../TimeSeries/')

import time_series as ts
import numpy as np
import pandas as pd
import os.path

## Loads

### Get a list of loads with a weigth by country 

The Julia package `TemperateOptimalPowerFlow.jl` can be used to generate a list of all loads in PanTaGruEl, together with some relevant information such as:
- the country in which the load is placed
- a weight indicating how much of the country's total load should be assigned to that particular load.

This list is stored in a CSV file that can be fetched with Pandas:

In [24]:
loads_info = pd.read_csv('data/pantagruel_load_info.csv')
loads_info

Unnamed: 0,id,country,load_prop,name
0,3935,DE,0.002204,Unnamed 1340
1,2243,FR,0.000443,Vallorcine
2,1881,FR,0.001275,Mallemort
3,1907,FR,0.000635,Breuil
4,2923,FR,0.000650,La Justice
...,...,...,...,...
3993,3028,ES,0.000088,Conso
3994,563,ES,0.005850,Sanchinarro
3995,732,DK,0.000705,Unnamed 199
3996,3276,PT,0.013745,Unnamed 115


In [3]:
n_loads = len(loads_info)

The number of loads in each country is determined as follows:

In [4]:
load_count_by_country = loads_info["country"].value_counts()
load_count_by_country

ES    908
FR    843
DE    560
IT    323
PL    188
DK    180
CH    163
PT    145
RO    124
AT     78
CZ     71
BG     64
BE     50
HU     47
RS     47
SK     43
GR     38
NL     38
BA     35
HR     28
SI     15
ME     10
Name: country, dtype: int64

In [5]:
countries = load_count_by_country.keys()

### Assign a unique label to each load

For each load of the model, assign a label that is unique for the country:

In [6]:
country_label_count = {country: 0 for country in countries}
load_labels = []
for country in loads_info["country"]:
    load_labels.append(country_label_count[country])
    country_label_count[country] += 1
loads_info["label"] = load_labels
loads_info

Unnamed: 0,id,country,load_prop,name,label
0,3935,DE,0.002204,Unnamed 1340,0
1,2243,FR,0.000443,Vallorcine,0
2,1881,FR,0.001275,Mallemort,1
3,1907,FR,0.000635,Breuil,2
4,2923,FR,0.000650,La Justice,3
...,...,...,...,...,...
3993,3028,ES,0.000088,Conso,905
3994,563,ES,0.005850,Sanchinarro,906
3995,732,DK,0.000705,Unnamed 199,179
3996,3276,PT,0.013745,Unnamed 115,144


Verify that each combination of country and label is unique:

In [7]:
assert len(loads_info.groupby(["country", "label"])) == n_loads

### Use ENTSO-E models for synthetic time series

We make use of the ENTSO-E models stored in the directory `../TimeSeries/models/`:

In [8]:
def find_entsoe_model_file(country):
    for start_year in range(2015, 2020):
        candidate_filename = '../TimeSeries/models/entsoe_load_%s_%d_2023.npz' % (country, start_year)
        if os.path.isfile(candidate_filename):
            return candidate_filename
    return None

In [9]:
model_file_by_country = {country: find_entsoe_model_file(country) for country in countries}
model_file_by_country

{'ES': '../TimeSeries/models/entsoe_load_ES_2015_2023.npz',
 'FR': '../TimeSeries/models/entsoe_load_FR_2015_2023.npz',
 'DE': '../TimeSeries/models/entsoe_load_DE_2015_2023.npz',
 'IT': '../TimeSeries/models/entsoe_load_IT_2015_2023.npz',
 'PL': '../TimeSeries/models/entsoe_load_PL_2015_2023.npz',
 'DK': '../TimeSeries/models/entsoe_load_DK_2015_2023.npz',
 'CH': '../TimeSeries/models/entsoe_load_CH_2015_2023.npz',
 'PT': '../TimeSeries/models/entsoe_load_PT_2015_2023.npz',
 'RO': '../TimeSeries/models/entsoe_load_RO_2015_2023.npz',
 'AT': '../TimeSeries/models/entsoe_load_AT_2015_2023.npz',
 'CZ': '../TimeSeries/models/entsoe_load_CZ_2015_2023.npz',
 'BG': '../TimeSeries/models/entsoe_load_BG_2015_2023.npz',
 'BE': '../TimeSeries/models/entsoe_load_BE_2015_2023.npz',
 'HU': '../TimeSeries/models/entsoe_load_HU_2015_2023.npz',
 'RS': '../TimeSeries/models/entsoe_load_RS_2016_2023.npz',
 'SK': '../TimeSeries/models/entsoe_load_SK_2015_2023.npz',
 'GR': '../TimeSeries/models/entsoe_load

### Generate time series for each country

For each country, we generate the corresponding number of synthetic time series based on the ENTSO-E models.

Only one time step per hour is kept.

In [10]:
def generate_time_series(country, count):
    model = ts.import_model(model_file_by_country[country])
    time_series = ts.generate_time_series(model, count)
    timesteps_per_day = time_series.shape[1] / (24 * 364)
    return time_series.reshape(count, 364*24, -1)[:, :, 0]

In [11]:
loads_by_country = {country: generate_time_series(country, count) for country, count in load_count_by_country.items()}

Combine all time series into an array:

In [12]:
all_time_series = np.array([load_prop * loads_by_country[country][label]
                            for _, (id, country, load_prop, name, label) in loads_info.iterrows()])

In [13]:
assert all_time_series.shape == (n_loads, 364 * 24)

### Export the time series

Define a dataframe containing all time series, with time steps as columns and loads as rows:

In [14]:
time_series_df = pd.DataFrame(all_time_series, index=loads_info["id"])
time_series_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3935,92.945033,89.480785,88.225194,91.061785,96.666502,107.999493,123.943390,139.592871,144.907957,148.770228,...,99.525113,102.352126,115.071266,119.518179,119.523723,116.480288,111.036684,113.467170,106.035647,100.257807
2243,28.410612,27.691152,26.464885,25.851808,26.482306,27.993760,29.556558,30.316745,30.796190,31.283045,...,30.135432,30.007257,31.250390,32.880663,33.400034,32.343622,31.047046,31.173392,31.245237,30.367551
1881,72.703218,71.555330,68.839141,68.020381,70.839493,76.842860,82.792519,86.751300,88.801406,89.861263,...,73.035680,72.660657,76.448504,81.237094,82.944178,80.323685,77.158848,77.214398,76.728922,75.739750
1907,46.953000,46.047000,44.327984,43.774499,45.327597,48.422931,51.884726,53.581210,54.255203,54.747989,...,46.143635,46.032430,48.322015,51.495865,52.555267,51.266080,49.429462,49.903205,50.457880,48.764180
2923,32.350021,31.410817,29.857433,29.053486,29.899647,32.109750,34.114474,34.707470,34.499595,34.245380,...,32.566646,32.496250,34.541452,37.350491,38.149554,36.649713,35.334095,35.736826,36.890201,36.366333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3028,1.932037,1.829253,1.784003,1.799255,1.891764,2.184144,2.587637,2.973570,3.277607,3.420166,...,2.217195,2.164416,2.220481,2.466483,2.605313,2.690442,2.706683,2.595562,2.356218,2.122550
563,131.117691,124.717343,120.854777,119.814169,123.039452,135.108957,152.451960,166.804350,177.524296,185.651991,...,152.071758,148.443699,151.156238,166.431123,176.109619,181.030601,182.017785,174.484630,159.329715,143.388321
732,1.881326,1.898496,1.888449,1.937411,2.087234,2.408764,2.800369,3.024296,3.108427,3.205517,...,2.463953,2.620689,2.781129,2.722922,2.606264,2.480823,2.358969,2.242723,2.109034,1.936989
3276,71.880270,65.877856,62.661583,61.518653,61.367962,62.792931,69.020118,79.735120,91.430762,99.716519,...,79.507975,78.373675,78.766867,86.212726,95.667902,99.086560,99.014021,94.894701,88.488985,79.468632


Export to CSV format:

In [27]:
time_series_df.to_csv("data/pantagruel_load_series.csv")

The whole procedure can be iterated multiple times to generate more than one time series for each load.

## Generators

As with the loads, the list of generators of PanTaGruEl is stored in a CSV file:

In [23]:
gens_info = pd.read_csv('data/pantagruel_gen_info.csv')
gens_info

Unnamed: 0,id,type,pmax,country,name,cost
0,1,Hydro,1.14000,BG,Uzundzhovo,6000
1,519,hydro_pure_ps,11.64000,BE,Coo,10000
2,788,Geothermal,0.19000,IT,Montalto,0
3,774,fossil_brown_lignite,1.77000,HU,Tisza II,2400
4,599,Biomass,0.01902,FR,Beaucouze,1000
...,...,...,...,...,...,...
1078,732,nuclear,52.00000,FR,Cattenom,1600
1079,603,fossil_oil,14.15000,FR,Le Havre,10000
1080,414,other_nl,4.08000,RO,Brâila,1000
1081,710,fossil_coal_gas,1.53000,FR,Chapelle''d Arblay,11000


There are many "types" of generators and some are more descriptive than others. For instance, in some countries many generators are described as "Hydro", while in others the distinction is made between "hydro_pure_storage", "hydro_ror", and so on.

In [16]:
gen_types = set(gens_info['type'])
gen_types

{'Biomass',
 'Coal',
 'Gas',
 'Geothermal',
 'Hydro',
 'Nuclear',
 'Oil',
 'Waste',
 'biomass',
 'fossil_brown_lignite',
 'fossil_brown_lignite_cons',
 'fossil_coal_gas',
 'fossil_coal_gas_cons',
 'fossil_coal_hard',
 'fossil_mixed',
 'fossil_oil',
 'hydro_mixed',
 'hydro_mixed_cons',
 'hydro_pure_ps',
 'hydro_pure_storage',
 'hydro_pure_storage_cons',
 'hydro_ror',
 'nuclear',
 'nuclear_cons',
 'other_nl',
 'other_nrenew',
 'other_nrenew_cons',
 'waste_nr'}

Isolate all generators of hydraulic type:

In [17]:
hydro_types = {type for type in gen_types if 'hydro' in str.lower(type)}
hydro_types

{'Hydro',
 'hydro_mixed',
 'hydro_mixed_cons',
 'hydro_pure_ps',
 'hydro_pure_storage',
 'hydro_pure_storage_cons',
 'hydro_ror'}

In [18]:
hydro_gens_info = gens_info[gens_info['type'].isin(hydro_types)]
hydro_gens_info

Unnamed: 0,id,type,pmax,country,name,cost
0,1,Hydro,1.14000,BG,Uzundzhovo,6000
1,519,hydro_pure_ps,11.64000,BE,Coo,10000
5,491,hydro_ror,0.72300,AT,Pyhrn,1000
6,228,Hydro,0.16400,DE,Wengerohr,6000
9,227,Hydro,0.16400,DE,Wengerohr,6000
...,...,...,...,...,...,...
1070,1008,hydro_ror,5.95000,ES,Narcea,1000
1072,434,hydro_ror,1.38000,RO,Iaz,1000
1073,604,hydro_ror,0.96000,FR,St. Chamas,1000
1075,563,Hydro,0.01105,FR,Le Hourat,6000


Generate noise series with mean one and standard deviation 0.1:

In [19]:
noise_time_series = 1.0 + 0.1 * ts.generate_noise(count=len(hydro_gens_info))

In [20]:
noise_time_series.mean(), noise_time_series.std(axis=1).mean()

(0.9999999999999988, 0.09934625244156545)

Multiply with the static cost for each generator to obtain time series:

In [21]:
cost_time_series = noise_time_series * np.expand_dims(hydro_gens_info["cost"], axis=1)

Pack everything into a dataframe, with time steps as columns and generators as rows:

In [22]:
cost_time_series_df = pd.DataFrame(cost_time_series, index=hydro_gens_info["id"])
cost_time_series_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5169.150338,5304.621871,5360.389003,4958.319593,5134.036622,5543.117355,5187.776199,5593.691432,5984.477627,5442.573180,...,6117.563349,5809.844178,5890.036275,5983.943418,5387.059988,5536.998874,5611.429344,5209.273099,5381.079283,5382.040982
519,10283.831518,8609.605980,7688.742092,9899.687423,10235.400972,8567.763639,8676.248952,9736.899820,8961.318505,9162.232098,...,9440.195051,9572.420336,8886.089321,9439.775262,10572.442370,10331.251392,8710.206090,8706.911088,9635.261739,9809.082533
491,925.493747,936.136415,809.841093,885.098426,977.529245,918.723415,936.645076,916.026855,837.108809,923.465460,...,892.678635,911.549241,896.829965,865.084190,923.411165,953.020448,894.981034,923.167195,957.528068,878.019499
228,5674.255183,5316.803085,5055.114745,5778.562077,5443.430198,4687.223835,5646.038747,5811.176213,5366.966530,5020.192151,...,5568.613957,4745.103385,4589.115388,5105.547910,5549.337728,5622.909189,6243.953092,6190.543228,5634.671412,5708.171748
227,5860.769153,5618.070285,5741.656176,5099.199807,4976.147113,5332.270738,5751.730928,5991.526034,5616.004667,5923.484214,...,6229.943909,6457.861590,6418.030401,5900.362472,5917.635307,6036.030343,5435.122470,5582.187643,5688.780576,5862.235736
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008,1154.995700,1136.170162,1088.800723,1057.571281,1151.874102,1136.758916,1049.366083,1085.174513,1028.842366,1005.777865,...,1126.372371,1063.802656,1102.867458,1214.423767,1218.601444,1140.451951,1105.274380,1079.669789,1015.820748,1078.646468
434,980.067878,1057.866892,987.287073,900.822396,963.378531,939.899360,952.373034,922.119482,792.590898,891.673640,...,925.059845,1081.996787,1056.365599,1102.467345,1023.823483,844.848659,958.340263,998.152006,946.849263,960.358437
604,924.131899,897.834463,973.001616,1008.030737,1055.329493,1049.673529,933.937474,939.280359,975.935037,1012.579455,...,1005.191074,990.496819,935.130188,903.810548,992.045075,1003.113631,987.617809,987.745098,926.006886,934.083194
563,5732.659558,5545.877264,5668.344817,5800.027428,6445.335402,6889.179619,7007.161982,7168.366222,5745.350646,5364.167438,...,6673.103503,6316.341961,5901.458489,6422.189094,5544.287304,5116.955667,5907.552851,6399.327282,5957.151031,5560.736098


Export to CSV format:

In [50]:
cost_time_series_df.to_csv("data/pantagruel_gen_cost_series.csv")