In [1]:
import requests
import pandas as pd
import datetime
from bs4 import BeautifulSoup as bs

# Import dictionaries, security_token and week_ahead loading script

from Name_convention_dictionaries import DocumentTypeDict, ProcessTypeDict,\
AreaDict, PsrTypeDict
from security_token import security_token
from load_data_entsoe_week import load_data_week_ahead

# Reverse dictionaries for easier understanding

zone_dict = {v:k for k,v in AreaDict.items()}
document_type_dict = {v:k for k,v in DocumentTypeDict.items()}
process_type_dict = {v:k for k,v in ProcessTypeDict.items()}
generation_type_dict = {v:k for k,v in PsrTypeDict.items()}

In [2]:
# support functions
def str_to_datetime(date_str):
    return datetime.datetime.strptime(date_str, '%Y%m%d%H%M')

def datetime_to_str(date):
    return datetime.datetime.strftime(date, "%Y%m%d%H%M")

In [3]:
# Make sure to create a data folder in the directory, otherwise the files cannot be saved
# Do not worry if you get an error at the end. This is because the data only goes back until 2014

# Define parameters to download the data 
country = 'Germany'
process_type = 'Week ahead'
document_type = 'System total load'

# The Entso-e API only allows to download 380 days per call. So create a loop to download all data 
# Time-format: Year, Month, Day, Hour, Minute

num_of_years_to_download = 7
most_resent_date = '202010252300'
list_for_loop = [365]*num_of_years_to_download

file_list = []

for year in list_for_loop:
    begin_time = datetime_to_str(str_to_datetime(most_resent_date) - datetime.timedelta(days=year))
    end_time = most_resent_date

    # generate url for API call

    url = "https://transparency.entsoe.eu/api?securityToken="+security_token\
          +'&documentType='+document_type_dict[document_type]\
          +'&processType='+process_type_dict[process_type]\
          +'&outBiddingZone_Domain='+zone_dict[country]\
          +'&periodStart='+begin_time\
          +'&periodEnd='+end_time
    
    # load and save data
    
    try:
        file_name = load_data_week_ahead(url)
        file_list.append(file_name)
        most_resent_date = begin_time
    except:
        print('No more data available')

You downloaded data for Zone "10Y1001A1001A83F" which is "Germany"
--------------------------------------------------------------------------------------------------
The downloaded time period is from 2019-10-20 22:00:00 until 2020-10-25 23:00:00 
--------------------------------------------------------------------------------------------------
The loaded document type is "A65" which corresponds to: "System total load"
The loaded process type is "A31" which corresponds to: "Week ahead" , other examples: day Ahead, week ahead
######################################################
Sucessfully saved to:
Week_ahead_Germany_2019-10-20_2200to2020-10-25_2300.csv
######################################################



None


HTTPError: 500 Server Error: Internal Server Error for url: https://transparency.entsoe.eu/api?securityToken=5e892e19-3f90-40f3-8ad6-4c87a3f34fa5&documentType=A65&processType=A31&outBiddingZone_Domain=10Y1001A1001A83F&periodStart=201810262300&periodEnd=201910262300

Check the data was loaded properly and concatinate it to get the only one database

In [None]:
dataframes = [pd.read_csv('./data/'+i) for i in file_list]

In [None]:
for i in dataframes:
    print(i.shape)

In [None]:
# concatenate all dataframes
df = dataframes[1]
for i in dataframes:
    df = df.append(i)

In [None]:
# remove duplicates
print(df.shape)
df.drop_duplicates(inplace=True)
print(df.shape)

In [None]:
df.sample(10)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
plt.plot(df.min_date, df.min_forecast_in_MAW)
plt.xticks(rotation=90);