# Generating synthetic time series for PanTaGruEl

PanTaGruEl is a model of the European high-voltage network. It contains nearly 4000 nodes for which synthetic time series must be created. This notebooks describes the procedure.

In [1]:
import sys
sys.path.append('..')

import time_series as ts
import numpy as np
import pandas as pd
import os.path

## Loads

### Get a list of loads with a weigth by country 

The Julia package `TemperateOptimalPowerFlow.jl` can be used to generate a list of all loads in PanTaGruEl, together with some relevant information such as:
- the country in which the load is placed
- a weight indicating how much of the country's total load should be assigned to that particular load.

This list is stored in a CSV file that can be fetched with Pandas:

In [2]:
loads_info = pd.read_csv('../../data/pantagruel_loads_info.csv')
loads_info

Unnamed: 0,id,country,load_prop,name
0,3935,DE,0.002204,Unnamed 1340
1,2243,FR,0.000443,Vallorcine
2,1881,FR,0.001275,Mallemort
3,1907,FR,0.000635,Breuil
4,2923,FR,0.000650,La Justice
...,...,...,...,...
3993,3028,ES,0.000088,Conso
3994,563,ES,0.005850,Sanchinarro
3995,732,DK,0.000705,Unnamed 199
3996,3276,PT,0.013745,Unnamed 115


In [3]:
n_loads = len(loads_info)

The number of loads in each country is determined as follows:

In [4]:
load_count_by_country = loads_info["country"].value_counts()
load_count_by_country

ES    908
FR    843
DE    560
IT    323
PL    188
DK    180
CH    163
PT    145
RO    124
AT     78
CZ     71
BG     64
BE     50
HU     47
RS     47
SK     43
GR     38
NL     38
BA     35
HR     28
SI     15
ME     10
Name: country, dtype: int64

In [5]:
countries = load_count_by_country.keys()

### Assign a unique label to each load

For each load of the model, assign a label that is unique for the country:

In [6]:
country_label_count = {country: 0 for country in countries}
load_labels = []
for country in loads_info["country"]:
    load_labels.append(country_label_count[country])
    country_label_count[country] += 1
loads_info["label"] = load_labels
loads_info

Unnamed: 0,id,country,load_prop,name,label
0,3935,DE,0.002204,Unnamed 1340,0
1,2243,FR,0.000443,Vallorcine,0
2,1881,FR,0.001275,Mallemort,1
3,1907,FR,0.000635,Breuil,2
4,2923,FR,0.000650,La Justice,3
...,...,...,...,...,...
3993,3028,ES,0.000088,Conso,905
3994,563,ES,0.005850,Sanchinarro,906
3995,732,DK,0.000705,Unnamed 199,179
3996,3276,PT,0.013745,Unnamed 115,144


Verify that each combination of country and label is unique:

In [7]:
assert len(loads_info.groupby(["country", "label"])) == n_loads

### Use ENTSO-E models for synthetic time series

We make use of the ENTSO-E models stored in the directory `../models/`:

In [8]:
def find_entsoe_model_file(country):
    for start_year in range(2015, 2020):
        candidate_filename = '../models/entsoe_load_%s_%d_2023.npz' % (country, start_year)
        if os.path.isfile(candidate_filename):
            return candidate_filename
    return None

In [9]:
model_file_by_country = {country: find_entsoe_model_file(country) for country in countries}
model_file_by_country

{'ES': '../models/entsoe_load_ES_2015_2023.npz',
 'FR': '../models/entsoe_load_FR_2015_2023.npz',
 'DE': '../models/entsoe_load_DE_2015_2023.npz',
 'IT': '../models/entsoe_load_IT_2015_2023.npz',
 'PL': '../models/entsoe_load_PL_2015_2023.npz',
 'DK': '../models/entsoe_load_DK_2015_2023.npz',
 'CH': '../models/entsoe_load_CH_2015_2023.npz',
 'PT': '../models/entsoe_load_PT_2015_2023.npz',
 'RO': '../models/entsoe_load_RO_2015_2023.npz',
 'AT': '../models/entsoe_load_AT_2015_2023.npz',
 'CZ': '../models/entsoe_load_CZ_2015_2023.npz',
 'BG': '../models/entsoe_load_BG_2015_2023.npz',
 'BE': '../models/entsoe_load_BE_2015_2023.npz',
 'HU': '../models/entsoe_load_HU_2015_2023.npz',
 'RS': '../models/entsoe_load_RS_2016_2023.npz',
 'SK': '../models/entsoe_load_SK_2015_2023.npz',
 'GR': '../models/entsoe_load_GR_2015_2023.npz',
 'NL': '../models/entsoe_load_NL_2015_2023.npz',
 'BA': '../models/entsoe_load_BA_2018_2023.npz',
 'HR': '../models/entsoe_load_HR_2015_2023.npz',
 'SI': '../models/en

### Generate time series for each country

For each country, we generate the corresponding number of synthetic time series based on the ENTSO-E models.

Only one time step per hour is kept.

In [10]:
def generate_time_series(country, count):
    model = ts.import_model(model_file_by_country[country])
    time_series = ts.generate_time_series(model, count)
    timesteps_per_day = time_series.shape[1] / (24 * 364)
    return time_series.reshape(count, 364*24, -1)[:, :, 0]

In [11]:
loads_by_country = {country: generate_time_series(country, count) for country, count in load_count_by_country.items()}

Combine all time series into an array:

In [14]:
all_time_series = np.array([load_prop * loads_by_country[country][label]
                            for _, (id, country, load_prop, name, label) in loads_info.iterrows()])

In [15]:
assert all_time_series.shape == (n_loads, 364 * 24)

### Export the time series

Define a dataframe containing all time series, with time steps as columns and loads as rows:

In [21]:
time_series_df = pd.DataFrame(all_time_series, index=loads_info["id"])
time_series_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3935,99.959274,97.829985,97.642694,100.532840,108.517661,126.347206,150.184575,164.963361,169.166680,172.055891,...,113.803178,115.713388,127.123349,131.373412,129.800028,125.926796,120.267312,119.956505,115.108628,108.107244
2243,20.664855,20.285653,19.426884,19.053787,19.514611,20.269589,20.796185,21.617325,22.398293,23.158624,...,22.363029,21.973861,22.808207,23.899495,24.244794,23.547780,22.840726,22.794650,22.781265,22.010256
1881,59.263143,58.216934,55.430490,54.716946,57.173512,61.868427,66.293259,70.139584,72.797088,74.861660,...,60.695207,59.950325,63.199320,66.612297,67.662436,65.902594,63.594409,63.710057,63.525747,61.037000
1907,38.734120,37.841833,36.148714,35.374487,36.421954,38.661601,41.037038,41.929901,42.059877,42.308465,...,37.496287,37.408352,39.746428,42.837471,43.755831,42.544857,41.135088,41.824676,42.988851,41.272335
2923,35.782462,35.118666,33.575282,32.992485,34.207050,37.195591,40.173296,41.775677,42.490531,42.894291,...,35.926561,35.717455,37.731072,40.325753,41.207777,39.879913,38.256515,38.349701,38.494893,38.107350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3028,2.079885,1.943856,1.871553,1.843058,1.869788,1.952195,2.055742,2.119635,2.298178,2.478873,...,2.446943,2.373047,2.368531,2.524608,2.585750,2.660009,2.693227,2.647960,2.476211,2.269222
563,129.768568,121.948925,118.868444,118.769848,123.023529,135.533958,152.464818,166.152957,179.790219,188.096781,...,147.627794,144.265023,145.777239,158.290853,163.408990,168.776420,169.641245,164.358024,153.047803,141.012695
732,2.200296,2.200471,2.214010,2.255925,2.441554,2.834400,3.287184,3.470159,3.505433,3.543106,...,2.860246,3.016605,3.230722,3.113316,2.898449,2.756966,2.632307,2.505346,2.392886,2.272364
3276,62.326837,59.606357,56.752350,54.715227,53.303087,53.380340,55.882923,58.916477,62.360776,67.411904,...,65.850772,63.630188,62.357520,68.968348,77.654543,82.276648,83.021243,80.744203,76.524505,70.823423


Export to CSV format:

In [27]:
time_series_df.to_csv("../../data/pantagruel_load_series.csv")

The whole procedure can be iterated multiple times to generate more than one time series for each load.

## Generators

As with the loads, the list of generators of PanTaGruEl is stored in a CSV file:

In [2]:
gens_info = pd.read_csv('../../data/pantagruel_gens_info.csv')
gens_info

Unnamed: 0,id,type,pmax,country,name,cost
0,1,Hydro,1.14000,BG,Uzundzhovo,6000
1,519,hydro_pure_ps,11.64000,BE,Coo,10000
2,788,Geothermal,0.19000,IT,Montalto,0
3,774,fossil_brown_lignite,1.77000,HU,Tisza II,2400
4,599,Biomass,0.01902,FR,Beaucouze,1000
...,...,...,...,...,...,...
1078,732,nuclear,52.00000,FR,Cattenom,1600
1079,603,fossil_oil,14.15000,FR,Le Havre,10000
1080,414,other_nl,4.08000,RO,Brâila,1000
1081,710,fossil_coal_gas,1.53000,FR,Chapelle''d Arblay,11000


There are many "types" of generators and some are more descriptive than others. For instance, in some countries many generators are described as "Hydro", while in others the distinction is made between "hydro_pure_storage", "hydro_ror", and so on.

In [3]:
gen_types = set(gens_info['type'])
gen_types

{'Biomass',
 'Coal',
 'Gas',
 'Geothermal',
 'Hydro',
 'Nuclear',
 'Oil',
 'Waste',
 'biomass',
 'fossil_brown_lignite',
 'fossil_brown_lignite_cons',
 'fossil_coal_gas',
 'fossil_coal_gas_cons',
 'fossil_coal_hard',
 'fossil_mixed',
 'fossil_oil',
 'hydro_mixed',
 'hydro_mixed_cons',
 'hydro_pure_ps',
 'hydro_pure_storage',
 'hydro_pure_storage_cons',
 'hydro_ror',
 'nuclear',
 'nuclear_cons',
 'other_nl',
 'other_nrenew',
 'other_nrenew_cons',
 'waste_nr'}

Isolate all generators of hydraulic type:

In [4]:
hydro_types = {type for type in gen_types if 'hydro' in str.lower(type)}
hydro_types

{'Hydro',
 'hydro_mixed',
 'hydro_mixed_cons',
 'hydro_pure_ps',
 'hydro_pure_storage',
 'hydro_pure_storage_cons',
 'hydro_ror'}

In [5]:
hydro_gens_info = gens_info[gens_info['type'].isin(hydro_types)]
hydro_gens_info

Unnamed: 0,id,type,pmax,country,name,cost
0,1,Hydro,1.14000,BG,Uzundzhovo,6000
1,519,hydro_pure_ps,11.64000,BE,Coo,10000
5,491,hydro_ror,0.72300,AT,Pyhrn,1000
6,228,Hydro,0.16400,DE,Wengerohr,6000
9,227,Hydro,0.16400,DE,Wengerohr,6000
...,...,...,...,...,...,...
1070,1008,hydro_ror,5.95000,ES,Narcea,1000
1072,434,hydro_ror,1.38000,RO,Iaz,1000
1073,604,hydro_ror,0.96000,FR,St. Chamas,1000
1075,563,Hydro,0.01105,FR,Le Hourat,6000


Generate noise series with mean one and standard deviation 0.1:

In [6]:
noise_time_series = 1.0 + 0.1 * ts.generate_noise(count=len(hydro_gens_info))

In [7]:
noise_time_series.mean(), noise_time_series.std(axis=1).mean()

(1.0, 0.09833027375471375)

Multiply with the static cost for each generator to obtain time series:

In [28]:
cost_time_series = noise_time_series * np.expand_dims(hydro_gens_info["cost"], axis=1)

Pack everything into a dataframe, with time steps as columns and generators as rows:

In [29]:
cost_time_series_df = pd.DataFrame(cost_time_series, index=hydro_gens_info["id"])
cost_time_series_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6813.632628,6537.902132,6616.115072,6646.810135,6708.281194,7012.367033,6863.560870,7170.755973,7251.605907,7082.297239,...,6720.327354,6228.113388,6171.181863,6605.610445,6659.855073,7061.661549,7089.057897,6772.315278,6684.074582,6697.218427
519,8612.761175,8265.581624,8451.942581,8843.480549,9504.028868,9597.547517,9263.409133,9319.726280,10196.095933,10284.171894,...,9469.818151,9247.846850,9555.423424,9685.726174,8590.586244,8687.996116,10492.546502,9549.034540,8451.943473,9092.390134
491,1011.328211,992.030917,945.784798,868.400883,940.180120,1022.586088,970.954296,1025.046706,1049.845299,1062.376401,...,983.199097,1014.866819,1084.661389,967.936081,950.418664,1021.087987,865.685451,939.778949,1012.463335,960.391449
228,5273.778137,5001.253833,5387.279117,5692.385069,5270.422563,5429.074793,5468.942573,4870.058604,5358.222681,5735.519973,...,5424.198727,5091.733919,5176.786433,5714.944087,5642.187332,5559.607453,5448.828316,5406.815390,5397.662760,5381.681212
227,6615.946780,6851.158602,7593.121658,6532.032046,6747.414263,6437.056317,6365.250905,6675.026542,5934.389404,6438.158716,...,6804.429325,6884.181842,6794.400908,6223.994106,6556.930456,7120.153673,6306.316343,6989.713105,6824.726759,6717.104287
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008,1017.868069,1022.803726,985.583252,967.611003,1128.996617,1030.644568,825.344751,907.676974,921.480550,918.291324,...,915.730954,941.261177,1147.659857,1020.691936,945.948535,1006.290396,991.152800,1131.776890,1096.223397,996.670453
434,1108.562103,1151.891431,1150.245269,1156.876072,1091.862526,1141.278538,1305.073682,1096.861631,995.965679,1043.684868,...,900.353070,938.046205,1047.915407,1012.515600,1068.855207,1034.267786,1060.850996,1068.619916,1005.774653,1069.434441
604,1001.303301,1103.756365,1025.668349,1144.819841,1085.565322,1012.260754,1161.922893,1108.814160,977.206524,922.328192,...,975.674581,982.827647,766.554543,908.452626,1047.173294,1041.772161,1059.275251,925.717789,1077.123181,1056.770104
563,5654.105963,5772.481302,5968.908659,5375.630535,5385.639226,5511.140731,5108.684415,4975.551622,4997.518324,5006.193099,...,6381.792548,5505.334852,5469.624327,6481.260567,5597.519975,4880.225908,6035.214832,5897.328085,5559.571055,6021.922943


Export to CSV format:

In [50]:
cost_time_series_df.to_csv("../../data/pantagruel_gen_cost_series.csv")