In [1]:
import requests
import pandas as pd
import lxml.etree as et
from utils import DayCET
from isodate import parse_duration
from tqdm import tqdm

# From API to file

In [2]:
TOKEN = "e576c346-cc1a-4c98-8254-c79ddf60dc99"
ENDPOINT = "https://web-api.tp.entsoe.eu/api"
f2C_tEIC = {
    'FR' : '10YFR-RTE------C'
}
fEIC_t2C = {v: k for k, v in f2C_tEIC.items()}
ROOT_PATH = r'C:\Users\Raphaël Thireau\OneDrive - versoenergy\Documents\datapipeline'

In [3]:
def request_from_dict(dict):
    return f'{ENDPOINT}?{"&".join([f"{k}={v}" for k, v in dict.items()])}&securityToken={TOKEN}'

In [4]:
def DA_price_request(start : pd.Timestamp, end : pd.Timestamp, bidding_zone):
    eic_code = f2C_tEIC[bidding_zone]
    request_dict = {
        'documentType' : 'A44',
        'periodStart' : start.strftime(format='%Y%m%d%H%M'),
        'periodEnd' : end.strftime(format='%Y%m%d%H%M'),
        'out_Domain' : eic_code,
        'in_Domain' : eic_code, 
    }
    return request_from_dict(request_dict)

In [5]:
def get_DA_price_xml(day : DayCET, bidding_zone):
    url = DA_price_request(start=day.start_utc, end=day.end_utc, bidding_zone=bidding_zone)
    resp = requests.get(url=url)
    root = et.fromstring(resp.content)
    et.indent(root, space="    ")
    et.ElementTree(root).write(f'{ROOT_PATH}/process/data/ENTSOE/DayAhead/{bidding_zone}/{day.date_str.replace('-', '')}_DA_price_{bidding_zone}.xml')
    return root

In [6]:
for date in tqdm(pd.date_range(start='2024-12-17', end='2024-12-19', freq='D', inclusive='both')):
    get_DA_price_xml(DayCET(date.strftime(format='%Y-%m-%d')), 'FR')

100%|██████████| 3/3 [00:04<00:00,  1.52s/it]


In [12]:
root = get_DA_price_xml(DayCET('2024-12-01'), 'FR')

# From file to DataFrame

In [15]:
ROOT_PATH = r'C:\Users\Raphaël Thireau\OneDrive - versoenergy\Documents\datapipeline'
bidding_zone = 'FR'
date = '2024-12-04'
file = f'{ROOT_PATH}/data/entsoe/da_price/{bidding_zone}/{date.replace('-', '')}_DA_price_{bidding_zone}.xml'

In [None]:
tree = et.parse(file)
root = tree.getroot()
ns = root.nsmap
ts_elem = root.find('TimeSeries', namespaces=ns)
country = fEIC_t2C[ts_elem.find('in_Domain.mRID', namespaces=ns).text]
period_elem = ts_elem.find('Period', namespaces=ns)
time_inteval_elem = period_elem.find('timeInterval', namespaces=ns)
start = pd.Timestamp(time_inteval_elem.find('start', namespaces=ns).text)
end = pd.Timestamp(time_inteval_elem.find('end', namespaces=ns).text)
resolution = parse_duration(period_elem.find('resolution', namespaces=ns).text)
date_range = pd.date_range(start=start, end=end, freq=resolution, inclusive='left').tz_localize(None)
val = [0 for _ in range(len(date_range))]
for pt in period_elem.findall('Point', namespaces=ns):
    pos = int(pt.find('position', namespaces=ns).text)
    price = float(pt.find('price.amount', namespaces=ns).text)
    val[pos-1] = price
df = pd.DataFrame(data=val, index=date_range, columns=['price'])

# From DataFrame to SQL

In [62]:
df

Unnamed: 0,price
2024-12-03 23:00:00,121.12
2024-12-04 00:00:00,116.66
2024-12-04 01:00:00,112.79
2024-12-04 02:00:00,108.9
2024-12-04 03:00:00,106.98
2024-12-04 04:00:00,110.72
2024-12-04 05:00:00,132.55
2024-12-04 06:00:00,149.23
2024-12-04 07:00:00,200.49
2024-12-04 08:00:00,205.1
