In [1]:
import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime,timedelta,date
import sys



import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib import cm
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from matplotlib.dates import (YEARLY, MONTHLY, DateFormatter, WeekdayLocator, MonthLocator,DayLocator,
                              rrulewrapper, RRuleLocator, drange, num2date, date2num)
import matplotlib.patches as mpatches
import matplotlib.units as munits
from matplotlib.dates import num2date, date2num

import seaborn as sns


import html





In [2]:
def catalogo_esios(token):
    """
    Download all the identifiers and their description of esios
    
    Parameters
    ----------
    token : str
        The esios token needed to make the API calls
        
    Returns
    -------
    DataFrame
        Dataframe of pandas with the API id catalog
    
    """
    
    
    headers = {'Accept':'application/json; application/vnd.esios-api-v2+json',
           'Content-Type':'application/json',
           'Host':'api.esios.ree.es',
           'Cookie' : '',
           'Authorization':f'Token token={token}',
           'x-api-key': f'{token}',
           'Cache-Control': 'no-cache',
           'Pragma': 'no-cache'
          }
    end_point = 'https://api.esios.ree.es/indicators'
    response = requests.get(end_point, headers=headers).json()
    
    # of the raw json result is converted to pandas, and the tags in the description field are removed.

    return (pd
            .json_normalize(data=response['indicators'], errors='ignore')
            .assign(description = lambda df_: df_.apply(lambda df__: html.unescape(df__['description']
                                                            .replace('<p>','')
                                                            .replace('</p>','')
                                                            .replace('<b>','')
                                                            .replace('</b>','')), 
                                                  axis=1)
                   )
           )


In [5]:
def download_esios(token,indicadores,fecha_inicio,fecha_fin,time_trunc='hour'):
    """
    Download of data from a given identifier and between two dates
    
    Parameters
    ----------
    token : str
        The esios token needed to make the API calls
    
    indicadores : list
        List with the strings of the indicators for which we want to download data
        
    fecha_inicio : str
        Date with format %Y-%M-%d, indicating the date from which you want to download the data.
        Example 2022-10-30, October 30, 2022.
    
    fecha_fin : str
        Date with format %Y-%M-%d, which indicates the date up to which you want to download the data.
        Example 2022-10-30, 30 October 2022.
        
    time_trunc : str, optional
        Additional field that allows us to choose the granularity of the data we want to download.
        
    Returns
    -------
    DataFrame
        Dataframe of pandas with requested data
    
    """
    
    # we prepare the header to insert in the call.
    
    headers = {'Accept':'application/json; application/vnd.esios-api-v2+json',
           'Content-Type':'application/json',
           'Host':'api.esios.ree.es',
           'Cookie' : '',
           'Authorization':f'Token token={token}',
           'x-api-key': f'{token}',
           'Cache-Control': 'no-cache',
           'Pragma': 'no-cache'
          }
    
    # we prepare the basic url to which we will add the necessary fields 
    
    url = 'https://api.esios.ree.es/indicators'
    
    # The procedure is simple: 
    # a) for each of the indicators we will configure the url, according to the indications in the documentation.
    # b) We make the call and collect the data in json format.
    # c) We add the information to a list.
    
    lista=[]

    for indicador in indicadores:
        url_ = f'{url}/{indicador}?start_date={fecha_inicio}T00:00&end_date={fecha_fin}T23:59&time_trunc={time_trunc}&geo_limit=peninsular&geo_id=8741'
        response = requests.get(url_, headers=headers).json()
        lista.append(pd.json_normalize(data=response['indicator'],record_path=['values'], meta=['name','short_name'], errors='ignore'))

    # We return as output of the function a df resulting from the concatenation of the elements of the list.
    #This procedure, with a single concatenation at the end, is much more efficient than doing multiple concatenations. 
    
    return pd.concat(lista, ignore_index=True )

In [4]:
def download_ree(indicador,fecha_inicio,fecha_fin,time_trunc='year'):
    """
    Downloading data from apidatos.ree.es between two given dates 
    
    Parameters
    ----------
    
    indicador : list
        List with the strings of the indicator for which we want to download data
        
    fecha_inicio : str
        Date with format %Y-%M-%d, indicating the date from which you want to download the data.
        Example 2022-10-30, October 30, 2022.
    
    fecha_fin : str
        Date with format %Y-%M-%d, which indicates the date up to which you want to download the data.
        Example 2022-10-30, 30 October 2022.
        
    time_trunc : str, optional
        Additional field that allows us to choose the granularity of the data we want to download.
        Hour, Day, Month... depending on the end point this order will be applied or not.
        
    Returns
    -------
    DataFrame
        Dataframe of pandas with requested data
    
    """
    
    
    headers = {'Accept': 'application/json',
               'Content-Type': 'applic<ation/json',
               'Host': 'apidatos.ree.es'}
    
    end_point = 'https://apidatos.ree.es/es/datos/'
    
    lista=[]
    url = f'{end_point}{indicador}?start_date={fecha_inicio}T00:00&end_date={fecha_fin}T23:59&\
    time_trunc={time_trunc}'
    print (url)
    
    response = requests.get(url, headers=headers).json()
    
    return pd.json_normalize(data=response['included'], 
                                   record_path=['attributes','values'], 
                                   meta=['type',['attributes','type' ]], 
                                   errors='ignore')

In [3]:
#Download with my token all the identifiers and their description from ESIOS

token1 = "7ca06b4e6c7dde845c791fa8ab30c8058ec0929d1fe20897802aac932acc013d"
catalogo = catalogo_esios(token1)
catalogo.head()

Unnamed: 0,name,description,short_name,id
0,Generación programada PBF Hidráulica UGH,"Es el programa de energía diario, con desglose...",Hidráulica UGH,1
1,Generación programada PBF Hidráulica no UGH,"Es el programa de energía diario, con desglose...",Hidráulica no UGH,2
2,Generación programada PBF Turbinación bombeo,"Es el programa de energía diario, con desglose...",Turbinación bombeo,3
3,Generación programada PBF Nuclear,"Es el programa de energía diario, con desglose...",Nuclear,4
4,Generación programada PBF Hulla antracita Anex...,"Es el programa de energía diario, con desglose...",Hulla antracita RD 134/2010,5


In [10]:
#To be able to search for the variable identifier of the variable being searched for
#by words containing the variable

for i in catalogo.loc[catalogo['name'].str.contains('esiduos reno'),:].index:
       print (f"{catalogo.loc[i,'id']} -> {catalogo.loc[i,'name']}")

1491 -> Potencia instalada de generación residuos renovables
10062 -> Generación medida Residuos renovables


In [16]:
#Here we can observe the collection of solar photovoltaic energy on an hourly basis, having previously 
#searched that its identifier is 84.

identificadores = [84]

#It is collected from year to year as no more data
#can be collected at the same time.

fin = '2014-12-31' 
inicio = '2014-01-01'


token1 = "7ca06b4e6c7dde845c791fa8ab30c8058ec0929d1fe20897802aac932acc013d"
datos_raw1 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2015-12-31' 
inicio = '2015-01-01'
datos_raw2 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2016-12-31' 
inicio = '2016-01-01'
datos_raw3 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2017-12-31' 
inicio = '2017-01-01'
datos_raw4 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2018-12-31' 
inicio = '2018-01-01'
datos_raw5 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2019-12-31' 
inicio = '2019-01-01'
datos_raw6 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2020-12-31' 
inicio = '2020-01-01'
datos_raw7 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2021-12-31' 
inicio = '2021-01-01'
datos_raw8 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')


#All collected data are pooled
fotov1=pd.concat([datos_raw1,datos_raw2,datos_raw3,datos_raw4,datos_raw5,datos_raw6,datos_raw7,datos_raw8],axis=0)
fotov1

Unnamed: 0,value,datetime,datetime_utc,tz_time,geo_id,geo_name,name,short_name
0,13.6,2014-01-01T00:00:00.000+01:00,2013-12-31T23:00:00Z,2013-12-31T23:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
1,13.3,2014-01-01T01:00:00.000+01:00,2014-01-01T00:00:00Z,2014-01-01T00:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
2,13.3,2014-01-01T02:00:00.000+01:00,2014-01-01T01:00:00Z,2014-01-01T01:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
3,12.2,2014-01-01T03:00:00.000+01:00,2014-01-01T02:00:00Z,2014-01-01T02:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
4,7.8,2014-01-01T04:00:00.000+01:00,2014-01-01T03:00:00Z,2014-01-01T03:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
...,...,...,...,...,...,...,...,...
7113,6.9,2021-12-31T19:00:00.000+01:00,2021-12-31T18:00:00Z,2021-12-31T18:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
7114,3.5,2021-12-31T20:00:00.000+01:00,2021-12-31T19:00:00Z,2021-12-31T19:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
7115,2.1,2021-12-31T21:00:00.000+01:00,2021-12-31T20:00:00Z,2021-12-31T20:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
7116,1.4,2021-12-31T22:00:00.000+01:00,2021-12-31T21:00:00Z,2021-12-31T21:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica


In [17]:
fin = '2022-12-31' 
inicio = '2022-01-01'
datos_raw9 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fin = '2023-01-31' 
inicio = '2023-01-01'
datos_raw10 = download_esios(token1, identificadores, inicio, fin, time_trunc='hour')

fotov2=pd.concat([datos_raw9,datos_raw10],axis=0)
fotov2

#All data is pooled in a dataset
fotov=pd.concat([fotov1,fotov2],axis=0)
fotov

Unnamed: 0,value,datetime,datetime_utc,tz_time,geo_id,geo_name,name,short_name
0,13.6,2014-01-01T00:00:00.000+01:00,2013-12-31T23:00:00Z,2013-12-31T23:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
1,13.3,2014-01-01T01:00:00.000+01:00,2014-01-01T00:00:00Z,2014-01-01T00:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
2,13.3,2014-01-01T02:00:00.000+01:00,2014-01-01T01:00:00Z,2014-01-01T01:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
3,12.2,2014-01-01T03:00:00.000+01:00,2014-01-01T02:00:00Z,2014-01-01T02:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
4,7.8,2014-01-01T04:00:00.000+01:00,2014-01-01T03:00:00Z,2014-01-01T03:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
...,...,...,...,...,...,...,...,...
739,44.0,2023-01-31T19:00:00.000+01:00,2023-01-31T18:00:00Z,2023-01-31T18:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
740,24.6,2023-01-31T20:00:00.000+01:00,2023-01-31T19:00:00Z,2023-01-31T19:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
741,10.1,2023-01-31T21:00:00.000+01:00,2023-01-31T20:00:00Z,2023-01-31T20:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica
742,14.7,2023-01-31T22:00:00.000+01:00,2023-01-31T21:00:00Z,2023-01-31T21:00:00.000Z,8741,Península,Generación programada P48 Solar fotovoltaica,Solar fotovoltaica


In [None]:
#We pass the dataset to an excel file for later reading in R

fotov.to_excel('fotov.xlsx', sheet_name='sheet1', index=False)

In [13]:
#Search for the description of each identifier we want

identificadores = [1293]
for id in identificadores:
    print(f"{catalogo.loc[catalogo['id']==id,'id'].values[0]}-->{catalogo.loc[catalogo['id']==id,'name'].values[0]}\
-{catalogo.loc[catalogo['id']==id,'description'].values[0]}"+'\n')

1293-->Demanda real-Es el valor real de la demanda de energía eléctrica medida en tiempo real.
Los datos representados en este indicador se refieren a datos Peninsulares.
Publicación: cada 5 minutos con la información de los 5 minutos anteriores.



In [15]:
#Obtain identifier and name of the ESIOS variables that contains 'otovol'

for i in catalogo.loc[catalogo['name'].str.contains('otovol'),:].index:
       print (f"{catalogo.loc[i,'id']} -> {catalogo.loc[i,'name']}")

14 -> Generación programada PBF Solar fotovoltaica
49 -> Generación programada PVP Solar fotovoltaica
84 -> Generación programada P48 Solar fotovoltaica
119 -> Generación programada PHF1 Solar fotovoltaica
154 -> Generación programada PHF2 Solar fotovoltaica
189 -> Generación programada PHF3 Solar fotovoltaica
224 -> Generación programada PHF4 Solar fotovoltaica
259 -> Generación programada PHF5 Solar fotovoltaica
294 -> Generación programada PHF6 Solar fotovoltaica
329 -> Generación programada PHF7 Solar fotovoltaica
434 -> Programa bilateral PBF Solar fotovoltaica
542 -> Generación prevista Solar fotovoltaica
1161 -> Generación medida Solar fotovoltaica
1295 -> Generación T.Real Solar fotovoltaica
1413 -> Generación programada PHFC Solar fotovoltaica
1486 -> Potencia instalada de generación solar fotovoltaica
1748 -> Generación T.Real solar fotovoltaica SNP
1779 -> Previsión diaria D+1 fotovoltaica
1780 -> Previsión intradiaria H+3 fotovoltaica
10358 -> Previsión diaria D+1 eólica má

The following is an example of how to obtain the annual values of certain energy from REE, where the data download is more limited than above.

In [6]:
fin = '2020-01-14' 
inicio = '2018-01-10'
identificador = 'generacion/estructura-generacion'

In [7]:
raw = download_ree(identificador,inicio,fin)
raw.sample()

https://apidatos.ree.es/es/datos/generacion/estructura-generacion?start_date=2018-01-10T00:00&end_date=2020-01-14T23:59&    time_trunc=year


Unnamed: 0,value,percentage,datetime,type,attributes.type
37,65873.839,0.006782,2020-01-01T00:00:00.000+01:00,Solar térmica,Renovable


In [8]:
generacion = (raw
              .assign(fecha=lambda df_: pd
                      .to_datetime(df_['datetime'],utc=True)
                      .dt
                      .tz_convert('Europe/Madrid')
                      .dt
                      .tz_localize(None)
                      )
              .query('type in ["Nuclear","Solar fotovoltaica","Eólica","Hidráulica"]')
              .drop(['attributes.type','datetime','percentage'],axis=1)
              .rename(columns={'value':'valor','type':'tipo','value':'generacion'})[['fecha','tipo','generacion']]
            )
generacion

Unnamed: 0,fecha,tipo,generacion
0,2018-01-01,Hidráulica,33503750.0
1,2019-01-01,Hidráulica,24719020.0
2,2020-01-01,Hidráulica,1901650.0
6,2018-01-01,Nuclear,51757980.0
7,2019-01-01,Nuclear,55824230.0
8,2020-01-01,Nuclear,2388719.0
29,2018-01-01,Eólica,47725240.0
30,2019-01-01,Eólica,54245060.0
31,2020-01-01,Eólica,1300311.0
32,2018-01-01,Solar fotovoltaica,7671875.0
