# Format JSON data

The goal of this notebook is to construct methods that extract the meaningful datas from the JSON received by the RTE API, and to reformat them in order to be able to store them inside csv files.

In [2]:
from re_forecast.data.load_data import get_rte_data

## 1/ Explore JSON structure

First we want to load a JSON with our load_data package :

In [15]:
# Set params for API query
start_date = "2015-06-01 06:00:00"
end_date = "2015-08-01 07:00:00"

# Set the ressource number:
# 1 -> Actual generation by production type
ressource_nb = 1

json = get_rte_data(ressource_nb, start_date, end_date)

Exploration of the json structure :

In [19]:
# Show keys
display(json.keys())

# The only item of the json is a list that contain datas for each 12 production types
display(len(json['actual_generations_per_production_type']))
generation_per_type = json['actual_generations_per_production_type']

# Pick the generation of one type
display(generation_per_type[2])
_type = generation_per_type[2]

# Show keys of _type
display(_type.keys())

# Note that the start date is not the same as provided
display(_type['start_date'], _type['end_date'])

# Number of generation values
display(len(_type['values']))

# The values are stored in a list of dicts
# with three keys: start_date, end_date and value
# The values are expressed in MWh of energy produced during one hour
generation_values = _type['values']

dict_keys(['actual_generations_per_production_type'])

12

{'start_date': '2015-06-02T00:00:00+02:00',
 'end_date': '2015-08-01T00:00:00+02:00',
 'production_type': 'FOSSIL_HARD_COAL',
 'values': [{'start_date': '2015-06-02T00:00:00+02:00',
   'end_date': '2015-06-02T01:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T01:00:00+02:00',
   'end_date': '2015-06-02T02:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T02:00:00+02:00',
   'end_date': '2015-06-02T03:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T03:00:00+02:00',
   'end_date': '2015-06-02T04:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T04:00:00+02:00',
   'end_date': '2015-06-02T05:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T05:00:00+02:00',
   'end_date': '2015-06-02T06:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T06:00:00+02:00',
   'end_date': '2015-06-02T07:00:00+02:00',
   'value': 15},
  {'start_date': '2015-06-02T07:00:00+02:00',
   'end_date': '2015-06-02T08:00:00+02:00',
   'value': 15},
  {'star

dict_keys(['start_date', 'end_date', 'production_type', 'values'])

'2015-06-02T00:00:00+02:00'

'2015-08-01T00:00:00+02:00'

1440

## 2/ Extract the list of generation units

We want to iterate over the 'generation_per_type' list in order to extract the names of the generation units

In [20]:
# Instanciate an empty list
units_names = []

# Iterate over generation per type
for unit in generation_per_type:
    units_names.append(unit['production_type'])

# Display the list
display(units_names)

['BIOMASS',
 'FOSSIL_GAS',
 'FOSSIL_HARD_COAL',
 'FOSSIL_OIL',
 'HYDRO_PUMPED_STORAGE',
 'HYDRO_RUN_OF_RIVER_AND_POUNDAGE',
 'HYDRO_WATER_RESERVOIR',
 'NUCLEAR',
 'SOLAR',
 'WASTE',
 'WIND_ONSHORE',
 'TOTAL']