# Get data notebook

This is the final step of the completion of the data package.

In [30]:
import time
import datetime
from copy import copy

from re_forecast.data.utils import slice_dates

## 1/ Creating timing management decorators

The first step is to develop some timing management decorators to use with the 'get_data' funtions. Thsese timing management decorators will help respect the time delay between each API call imposed by the API provider (RTE in the first place).

In [78]:
# Test
time.time()

1708209035.5059075

In [162]:
def delay(func,
          minimal_call_timedelta = 10,
          calls = list()
          ):
    """Decorator that block the execution of a function
    before a given delay"""
    def wrapper(*args) -> any:
        """The wrapper return the function when two consecutive function call
        are more than x seconds appart, with x a defined timedelta"""

        # We append the call time to calls list
        calls.append(time.time())

        # The function is return at the first call
        if len(calls) <= 1:

            return func(*args)

        # The function is not return when the timedelta between two following
        # calls is less than the minimal timedelta. An error message is printed.
        elif calls[-1] - calls[-2] < minimal_call_timedelta:
            print(f"You cannot make two following {func.__name__} calls less than {minimal_call_timedelta}s appart.")

            return

        # In all other cases, the function is returned.
        else:

            return func(*args)

    return wrapper


In [163]:
@delay
def f1():
    print("Hello !")

In [167]:
f1()

Hello !


## 2/ Create the slice dates function

In order to be able to construct datasets that are over the timedelta limit fixed by the API (which start and end dates are more than the timedelta limit appart), we have to learn how to slice a time range into multiple sub time ranges, with the slice_dates function.

In [29]:
# Set the ressource_nb
ressource_nb = 1

# Set the start and end dates
start_date = "2022-09-01 00:00:00"
end_date = "2024-12-01 00:00:00"

# Set the dt format
dt_format = "%Y-%m-%d %H:%M:%S"

# This is the timedelatas of one data point for each ressource called
ressource_datapoint_timedelta = {1: datetime.timedelta(hours = 1),
                                 2: datetime.timedelta(hours = 1),
                                 3: datetime.timedelta(minutes = 15)}

# Limit timedelta (time ranges) for one API call for each ressource
ressource_time_delta = {1: 155, 2: 7, 3: 14}

# We set the data point timedelta corresponding to the ressource called
datapoint_timedelta = ressource_datapoint_timedelta[ressource_nb]
# Set the timedelta _limit depending on the ressource called
timedelta_limit = ressource_time_delta[ressource_nb]

# Transform start and end dates into datetime objects
start_date_dt = datetime.datetime.strptime(start_date, dt_format)
end_date_dt = datetime.datetime.strptime(end_date, dt_format)

# Compute the timedelta between end and start date
timedelta = end_date_dt - start_date_dt

# Divide the timedelta number of times that the timedelta limit fits in the
# time range (interval_nb), and the remaining in days
intervals_nb = timedelta.days // timedelta_limit

# Transform days_remain and timedelta_limit in datetime timedelta object
timedelta_limit_dt = datetime.timedelta(days = timedelta_limit)

# Create the timeranges list
timeranges = list()

# Iterate over the intervals_nb to create the time subranges if the interval_nb is more than 0
if intervals_nb >= 1:
    for interval in range(intervals_nb):
        # Create a subrange dict
        subrange = dict()

        # Create the start_date and the end_date of this time range
        start_date_dt_sub = start_date_dt + interval * timedelta_limit_dt
        end_date_dt_sub = start_date_dt + (interval + 1) * timedelta_limit_dt

        # Fill the subrange dict
        # If the second sub start_date is filled, it is shifted so that there is no
        # overlaping point in the dataset downloaded via the API
        if interval >= 1:
            subrange["start_date"] = start_date_dt_sub + datapoint_timedelta

        # If this is the first start date, there is no shift
        else:
            subrange["start_date"] = start_date_dt_sub

        subrange["end_date"] = end_date_dt_sub

        # Add the subrange dict to the timeranges list
        timeranges.append(subrange)

    # Append the remaining time range
    remaining_start_date_dt = timeranges[-1]["end_date"] + datapoint_timedelta
    timeranges.append({"start_date": remaining_start_date_dt,
                       "end_date": end_date_dt})

# If the intervals_nb is 0, just append the start and the end date
else:
    timeranges.append({"start_date": start_date_dt,
                       "end_date": end_date_dt})

# Show the resulting timeranges list
display(timeranges)

[{'start_date': datetime.datetime(2022, 9, 1, 0, 0),
  'end_date': datetime.datetime(2023, 2, 3, 0, 0)},
 {'start_date': datetime.datetime(2023, 2, 3, 1, 0),
  'end_date': datetime.datetime(2023, 7, 8, 0, 0)},
 {'start_date': datetime.datetime(2023, 7, 8, 1, 0),
  'end_date': datetime.datetime(2023, 12, 10, 0, 0)},
 {'start_date': datetime.datetime(2023, 12, 10, 1, 0),
  'end_date': datetime.datetime(2024, 5, 13, 0, 0)},
 {'start_date': datetime.datetime(2024, 5, 13, 1, 0),
  'end_date': datetime.datetime(2024, 10, 15, 0, 0)},
 {'start_date': datetime.datetime(2024, 10, 15, 1, 0),
  'end_date': datetime.datetime(2024, 12, 1, 0, 0)}]

In [36]:
# Set the ressource_nb
ressource_nb = 1

# Set the start and end dates
start_date = "2022-09-01 00:00:00"
end_date = "2023-12-01 00:00:00"

# Test the slice dates function
slice_dates(ressource_nb,
            start_date,
            end_date)

[{'start_date': datetime.datetime(2022, 9, 1, 0, 0),
  'end_date': datetime.datetime(2023, 2, 3, 0, 0)},
 {'start_date': datetime.datetime(2023, 2, 3, 1, 0),
  'end_date': datetime.datetime(2023, 7, 8, 0, 0)},
 {'start_date': datetime.datetime(2023, 7, 8, 1, 0),
  'end_date': datetime.datetime(2023, 12, 1, 0, 0)}]