# Get data notebook

This is the final step of the completion of the data package.

In [5]:
import time
import datetime
from copy import copy

from re_forecast.data.utils import slice_dates
from re_forecast.data.manage_data_storage import read_register
from re_forecast.data.get_data import download_and_format, get_rte_data

## 1/ Creating timing management decorators

The first step is to develop some timing management decorators to use with the 'get_data' funtions. Thsese timing management decorators will help respect the time delay between each API call imposed by the API provider (RTE in the first place).

In [2]:
# Test
time.time()

1710184295.9665208

In [3]:
def delay(func,
          minimal_call_timedelta = 10,
          calls = list()
          ):
    """Decorator that block the execution of a function
    before a given delay"""
    def wrapper(*args) -> any:
        """The wrapper return the function when two consecutive function call
        are more than x seconds appart, with x a defined timedelta"""

        # We append the call time to calls list
        calls.append(time.time())

        # The function is return at the first call
        if len(calls) <= 1:

            return func(*args)

        # The function is not return when the timedelta between two following
        # calls is less than the minimal timedelta. An error message is printed.
        elif calls[-1] - calls[-2] < minimal_call_timedelta:
            print(f"You cannot make two following {func.__name__} calls less than {minimal_call_timedelta}s appart.")

            return

        # In all other cases, the function is returned.
        else:

            return func(*args)

    return wrapper


In [4]:
@delay
def f1():
    print("Hello !")

In [5]:
f1()

Hello !


## 2/ Create the slice dates function

In order to be able to construct datasets that are over the timedelta limit fixed by the API (which start and end dates are more than the timedelta limit appart), we have to learn how to slice a time range into multiple sub time ranges, with the slice_dates function.

In [6]:
# Set the ressource_nb
ressource_nb = 1

# Set the start and end dates
start_date = "2022-09-01 00:00:00"
end_date = "2024-12-01 00:00:00"

# Set the dt format
dt_format = "%Y-%m-%d %H:%M:%S"

# This is the timedelatas of one data point for each ressource called
ressource_datapoint_timedelta = {1: datetime.timedelta(hours = 1),
                                 2: datetime.timedelta(hours = 1),
                                 3: datetime.timedelta(minutes = 15)}

# Limit timedelta (time ranges) for one API call for each ressource
ressource_time_delta = {1: 155, 2: 7, 3: 14}

# We set the data point timedelta corresponding to the ressource called
datapoint_timedelta = ressource_datapoint_timedelta[ressource_nb]
# Set the timedelta _limit depending on the ressource called
timedelta_limit = ressource_time_delta[ressource_nb]

# Transform start and end dates into datetime objects
start_date_dt = datetime.datetime.strptime(start_date, dt_format)
end_date_dt = datetime.datetime.strptime(end_date, dt_format)

# Compute the timedelta between end and start date
timedelta = end_date_dt - start_date_dt

# Divide the timedelta number of times that the timedelta limit fits in the
# time range (interval_nb), and the remaining in days
intervals_nb = timedelta.days // timedelta_limit

# Transform days_remain and timedelta_limit in datetime timedelta object
timedelta_limit_dt = datetime.timedelta(days = timedelta_limit)

# Create the timeranges list
timeranges = list()

# Iterate over the intervals_nb to create the time subranges if the interval_nb is more than 0
if intervals_nb >= 1:
    for interval in range(intervals_nb):
        # Create a subrange dict
        subrange = dict()

        # Create the start_date and the end_date of this time range
        start_date_dt_sub = start_date_dt + interval * timedelta_limit_dt
        end_date_dt_sub = start_date_dt + (interval + 1) * timedelta_limit_dt

        # Fill the subrange dict
        # If the second sub start_date is filled, it is shifted so that there is no
        # overlaping point in the dataset downloaded via the API
        if interval >= 1:
            subrange["start_date"] = start_date_dt_sub + datapoint_timedelta

        # If this is the first start date, there is no shift
        else:
            subrange["start_date"] = start_date_dt_sub

        subrange["end_date"] = end_date_dt_sub

        # Add the subrange dict to the timeranges list
        timeranges.append(subrange)

    # Append the remaining time range
    remaining_start_date_dt = timeranges[-1]["end_date"] + datapoint_timedelta
    timeranges.append({"start_date": remaining_start_date_dt,
                       "end_date": end_date_dt})

# If the intervals_nb is 0, just append the start and the end date
else:
    timeranges.append({"start_date": start_date_dt,
                       "end_date": end_date_dt})

# Show the resulting timeranges list
display(timeranges)

[{'start_date': datetime.datetime(2022, 9, 1, 0, 0),
  'end_date': datetime.datetime(2023, 2, 3, 0, 0)},
 {'start_date': datetime.datetime(2023, 2, 3, 1, 0),
  'end_date': datetime.datetime(2023, 7, 8, 0, 0)},
 {'start_date': datetime.datetime(2023, 7, 8, 1, 0),
  'end_date': datetime.datetime(2023, 12, 10, 0, 0)},
 {'start_date': datetime.datetime(2023, 12, 10, 1, 0),
  'end_date': datetime.datetime(2024, 5, 13, 0, 0)},
 {'start_date': datetime.datetime(2024, 5, 13, 1, 0),
  'end_date': datetime.datetime(2024, 10, 15, 0, 0)},
 {'start_date': datetime.datetime(2024, 10, 15, 1, 0),
  'end_date': datetime.datetime(2024, 12, 1, 0, 0)}]

In [7]:
# Set the ressource_nb
ressource_nb = 1

# Set the start and end dates
start_date = "2022-09-01 00:00:00"
end_date = "2023-12-01 00:00:00"

# Test the slice dates function
slice_dates(ressource_nb,
            start_date,
            end_date)

[{'start_date': datetime.datetime(2022, 9, 1, 0, 0),
  'end_date': datetime.datetime(2023, 2, 3, 0, 0)},
 {'start_date': datetime.datetime(2023, 2, 3, 1, 0),
  'end_date': datetime.datetime(2023, 7, 8, 0, 0)},
 {'start_date': datetime.datetime(2023, 7, 8, 1, 0),
  'end_date': datetime.datetime(2023, 12, 1, 0, 0)}]

## 3/ Testing the download_and_format function

We first test the function with dates that does not exceed the limits fixed by the API :

In [10]:
# Set the params
ressource_nb = 1
start_date = "2022-09-01 00:00:00"
end_date = "2022-11-01 00:00:00"

# Call the function
generation_values = download_and_format(ressource_nb,
                                        start_date,
                                        end_date)

# Display
display(generation_values)

[{'start_date': '2022-09-02T00:00:00+02:00',
  'end_date': '2022-09-02T01:00:00+02:00',
  'updated_date': '2022-09-01T23:51:02+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T01:00:00+02:00',
  'end_date': '2022-09-02T02:00:00+02:00',
  'updated_date': '2022-09-02T00:36:13+02:00',
  'value': 300,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T02:00:00+02:00',
  'end_date': '2022-09-02T03:00:00+02:00',
  'updated_date': '2022-09-02T01:36:12+02:00',
  'value': 300,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T03:00:00+02:00',
  'end_date': '2022-09-02T04:00:00+02:00',
  'updated_date': '2022-09-02T02:50:58+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T04:00:00+02:00',
  'end_date': '2022-09-02T05:00:00+02:00',
  'updated_date': '2022-09-02T03:36:01+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T05:00:00+02:00',
  'end_date': '2022-09-02T06:00:00+

We then test the function with dates that does exceed the limits fixed by the API :

In [11]:
# Set the params
ressource_nb = 1
start_date = "2022-09-01 00:00:00"
end_date = "2023-09-01 00:00:00"

# Call the function
generation_values = download_and_format(ressource_nb,
                                        start_date,
                                        end_date)

# Display
display(generation_values)

Please wait 15 minutes until another API call can be done...
Please wait 15 minutes until another API call can be done...


[{'start_date': '2022-09-02T00:00:00+02:00',
  'end_date': '2022-09-02T01:00:00+02:00',
  'updated_date': '2022-09-01T23:51:02+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T01:00:00+02:00',
  'end_date': '2022-09-02T02:00:00+02:00',
  'updated_date': '2022-09-02T00:36:13+02:00',
  'value': 300,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T02:00:00+02:00',
  'end_date': '2022-09-02T03:00:00+02:00',
  'updated_date': '2022-09-02T01:36:12+02:00',
  'value': 300,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T03:00:00+02:00',
  'end_date': '2022-09-02T04:00:00+02:00',
  'updated_date': '2022-09-02T02:50:58+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T04:00:00+02:00',
  'end_date': '2022-09-02T05:00:00+02:00',
  'updated_date': '2022-09-02T03:36:01+02:00',
  'value': 301,
  'production_type': 'BIOMASS'},
 {'start_date': '2022-09-02T05:00:00+02:00',
  'end_date': '2022-09-02T06:00:00+

In [12]:
# Check the lenght of the generation_values list
len(generation_values)

104455

## 4/ Test the get_rte_data function

Finnaly, we test the test generation data function.  
First with some data that are not already downloaded :

In [3]:
# Set the params
ressource_nb = 1
start_date = "2022-09-01 00:00:00"
end_date = "2023-09-01 00:00:00"
eic_code = None
production_type = "SOLAR"
production_subtype = None

# Call the function
solar_generation_df = get_rte_data(ressource_nb = ressource_nb,
                                   start_date = start_date,
                                   end_date = end_date,
                                   eic_code = eic_code,
                                   production_type = production_type,
                                   production_subtype = production_subtype)

# Display the register to verify that it was filled correctly
register = read_register()
display(register)

# Display the data
display(solar_generation_df)

Please wait 15 minutes until another API call can be done...
Please wait 15 minutes until another API call can be done...
The register already exists


Unnamed: 0,creation_date,ressource,start_date,end_date,eic_code,production_type,production_subtype,file_name
0,2024-03-12_14:15:18,actual_generations_per_production_type,2022-09-01_00:00:00,2023-09-01_00:00:00,,all-units,,actual_generations_per_production_type__2022-0...


Unnamed: 0,start_date,end_date,updated_date,value,production_type
29560,2022-09-02T00:00:00+02:00,2022-09-02T01:00:00+02:00,2022-09-01T23:51:02+02:00,0,SOLAR
29561,2022-09-02T01:00:00+02:00,2022-09-02T02:00:00+02:00,2022-09-02T00:36:13+02:00,0,SOLAR
29562,2022-09-02T02:00:00+02:00,2022-09-02T03:00:00+02:00,2022-09-02T01:36:12+02:00,0,SOLAR
29563,2022-09-02T03:00:00+02:00,2022-09-02T04:00:00+02:00,2022-09-02T02:50:58+02:00,0,SOLAR
29564,2022-09-02T04:00:00+02:00,2022-09-02T05:00:00+02:00,2022-09-02T03:36:01+02:00,0,SOLAR
...,...,...,...,...,...
100304,2023-08-31T19:00:00+02:00,2023-08-31T20:00:00+02:00,2024-01-04T17:19:09+01:00,3128,SOLAR
100305,2023-08-31T20:00:00+02:00,2023-08-31T21:00:00+02:00,2024-01-04T17:19:09+01:00,1082,SOLAR
100306,2023-08-31T21:00:00+02:00,2023-08-31T22:00:00+02:00,2024-01-04T17:19:09+01:00,297,SOLAR
100307,2023-08-31T22:00:00+02:00,2023-08-31T23:00:00+02:00,2024-01-04T17:19:09+01:00,0,SOLAR


We can now test for the same dates, but with a different generation type :

In [7]:
# Set the params
ressource_nb = 1
start_date = "2022-09-01 00:00:00"
end_date = "2023-09-01 00:00:00"
eic_code = None
production_type = "WIND_OFFSHORE"
production_subtype = None

# Call the function
solar_generation_df = get_rte_data(ressource_nb = ressource_nb,
                                   start_date = start_date,
                                   end_date = end_date,
                                   eic_code = eic_code,
                                   production_type = production_type,
                                   production_subtype = production_subtype)

# Display the register to verify that it was filled correctly
register = read_register()
display(register)

# Display the data
display(solar_generation_df)

Unnamed: 0,creation_date,ressource,start_date,end_date,eic_code,production_type,production_subtype,file_name
0,2024-03-12_14:15:18,actual_generations_per_production_type,2022-09-01_00:00:00,2023-09-01_00:00:00,,all-units,,actual_generations_per_production_type__2022-0...


Unnamed: 0,start_date,end_date,updated_date,value,production_type
81250,2023-03-27T08:00:00+02:00,2023-03-27T09:00:00+02:00,2023-03-27T10:20:43+02:00,-4,WIND_OFFSHORE
81251,2023-05-10T02:00:00+02:00,2023-05-10T03:00:00+02:00,2023-05-12T22:11:00+02:00,445,WIND_OFFSHORE
81252,2023-05-10T03:00:00+02:00,2023-05-10T04:00:00+02:00,2023-05-12T22:11:00+02:00,423,WIND_OFFSHORE
81253,2023-05-10T04:00:00+02:00,2023-05-10T05:00:00+02:00,2023-05-12T22:11:00+02:00,346,WIND_OFFSHORE
81254,2023-05-10T05:00:00+02:00,2023-05-10T06:00:00+02:00,2023-05-12T22:11:00+02:00,354,WIND_OFFSHORE
...,...,...,...,...,...
101861,2023-08-31T19:00:00+02:00,2023-08-31T20:00:00+02:00,2024-01-04T17:19:09+01:00,233,WIND_OFFSHORE
101862,2023-08-31T20:00:00+02:00,2023-08-31T21:00:00+02:00,2024-01-04T17:19:09+01:00,301,WIND_OFFSHORE
101863,2023-08-31T21:00:00+02:00,2023-08-31T22:00:00+02:00,2024-01-04T17:19:09+01:00,280,WIND_OFFSHORE
101864,2023-08-31T22:00:00+02:00,2023-08-31T23:00:00+02:00,2024-01-04T17:19:09+01:00,277,WIND_OFFSHORE


Finnaly test for another ressource, and dates that respect the timedelta limit :

In [8]:
# Set the params
ressource_nb = 2
start_date = "2022-09-01 00:00:00"
end_date = "2022-10-01 00:00:00"
eic_code = "17W0000014455651"
production_type = None
production_subtype = None

# Call the function
solar_generation_df = get_rte_data(ressource_nb = ressource_nb,
                                   start_date = start_date,
                                   end_date = end_date,
                                   eic_code = eic_code,
                                   production_type = production_type,
                                   production_subtype = production_subtype)

# Display the register to verify that it was filled correctly
register = read_register()
display(register)

# Display the data
display(solar_generation_df)

You cannot make two following get_rte_data calls less than 3600s appart.


Unnamed: 0,creation_date,ressource,start_date,end_date,eic_code,production_type,production_subtype,file_name
0,2024-03-12_14:15:18,actual_generations_per_production_type,2022-09-01_00:00:00,2023-09-01_00:00:00,,all-units,,actual_generations_per_production_type__2022-0...


None