# Use the Entso-e API to download the data programatically

Download the "week-ahead" data for Germany for: 
- Minimum load forecast per day of week 
- Maximum load forecast per day of week

All units are in MW (Megawatts). 

"Week-ahead" data provides the min and max forecast-values for each day of a given week. This means 14 values per week

In [1]:
import requests
import pandas as pd
import datetime
from bs4 import BeautifulSoup as bs

# Import dictionaries, security_token and week_ahead loading script

from Name_convention_dictionaries import DocumentTypeDict, ProcessTypeDict, AreaDict, PsrTypeDict
from security_token import security_token
from Load_data_entsoe_week import Load_data_week_ahead

# Reverse dictionaries for easier understanding

zone_dict = {v:k for k,v in AreaDict.items()}
document_type_dict = {v:k for k,v in DocumentTypeDict.items()}
process_type_dict = {v:k for k,v in ProcessTypeDict.items()}
generation_type_dict = {v:k for k,v in PsrTypeDict.items()}

In [2]:
# support functions
def str_to_datetime(date_str):
    return datetime.datetime.strptime(date_str, '%Y%m%d%H%M')

def datetime_to_str(date):
    return datetime.datetime.strftime(date, "%Y%m%d%H%M")

## Specify region (country name or bidding zone according to Entso-e) and time horizon
Check Name_convention_dictionaries.py if unsure how the country or bidding zone is named by Entso-e.

In [3]:
region = 'Germany'

# The Entso-e API only allows to download 380 days per call. So create a loop to download all data 
# Time-format: Year, Month, Day, Hour, Minute

num_of_years_to_download = 7
most_recent_date = '202010252300'
list_for_loop = [365]*num_of_years_to_download

## Download Load Data

In [5]:
# Define parameters to download the data 
process_type = 'Week ahead'
document_type = 'System total load'

In [6]:
last_date_to_download = most_recent_date

file_list = []

for year in list_for_loop:
    begin_time = datetime_to_str(str_to_datetime(last_date_to_download) - datetime.timedelta(days=year))
    end_time = last_date_to_download

    # generate url for API call

    url = "https://transparency.entsoe.eu/api?securityToken="+security_token\
          +'&documentType='+document_type_dict[document_type]\
          +'&processType='+process_type_dict[process_type]\
          +'&outBiddingZone_Domain='+zone_dict[region]\
          +'&periodStart='+begin_time\
          +'&periodEnd='+end_time
    
    # load and save data
    
    try:
        file_name = Load_data_week_ahead(url)
        file_list.append(file_name)
        last_date_to_download = begin_time
    except:
        print('\n No more data available')

You downloaded data for Zone "10Y1001A1001A83F" which is "Germany"
--------------------------------------------------------------------------------------------------
The downloaded time period is from 2019-10-20 22:00:00 until 2020-10-25 23:00:00 
--------------------------------------------------------------------------------------------------
The loaded document type is "A65" which corresponds to: "System total load"
The loaded process type is "A31" which corresponds to: "Week ahead" , other examples: day Ahead, week ahead
######################################################
Sucessfully saved to:
Week_ahead_Germany_2019-10-20_2200to2020-10-25_2300.csv
######################################################



You downloaded data for Zone "10Y1001A1001A83F" which is "Germany"
--------------------------------------------------------------------------------------------------
The downloaded time period is from 2018-10-21 22:00:00 until 2019-10-27 23:00:00 
------------------------------

Check the data was loaded properly and concatinate it to get the only one database

In [4]:
dataframes = [pd.read_csv('./data/'+i) for i in file_list]

In [5]:
for i in dataframes:
    print(i.shape)

(364, 4)
(364, 4)
(371, 4)
(371, 4)
(350, 4)
(266, 4)


In [6]:
# concatenate all dataframes
df = dataframes[0]
for i in dataframes:
    df = df.append(i)

In [7]:
# remove duplicates
print(df.shape)
df.drop_duplicates(inplace=True)
print(df.shape)

(2450, 4)
(2051, 4)


In [8]:
df.sample(10)

Unnamed: 0,min_date,max_date,min_forecast_in_MAW,max_forecast_in_MAW
108,2017-02-09 23:00:00,2017-02-09 23:00:00,51880,73711
31,2016-11-24 23:00:00,2016-11-24 23:00:00,47396,72116
340,2020-10-02 22:00:00,2020-10-02 22:00:00,42209,65667
211,2016-06-07 22:00:00,2016-06-07 22:00:00,41128,66337
83,2019-01-13 23:00:00,2019-01-13 23:00:00,42158,56714
354,2020-10-16 22:00:00,2020-10-16 22:00:00,45497,68632
57,2017-12-19 23:00:00,2017-12-19 23:00:00,49916,72335
239,2017-06-20 22:00:00,2017-06-20 22:00:00,43493,68365
246,2017-06-27 22:00:00,2017-06-27 22:00:00,43374,68948
3,2018-10-25 22:00:00,2018-10-25 22:00:00,44550,68497


In [9]:
#import matplotlib.pyplot as plt
#plt.figure(figsize=(20,10))
#plt.plot(df.min_date, df.min_forecast_in_MAW)
#plt.xticks(rotation=90);