In [2]:
import pytz
import yaml
import requests
import logging
logger = logging.getLogger('log')
logger.setLevel('INFO')
import os
from dateutil.relativedelta import *
#from datetime import datetime
import datetime
import numpy as np
import pandas as pd

In [3]:
downloadpath = 'downloads1/'
archivepath = 'archive1/'
outputpath = 'output1/'
outputfile = 'output1.csv'
if not os.path.exists(downloadpath): os.makedirs(downloadpath)
if not os.path.exists(outputpath): os.makedirs(outputpath)

In [4]:
conf = """
    ENTSO-E: 
        Data_Portal: 
            url_template: https://www.entsoe.eu/fileadmin/template/other/statistical_database/excel.php
            url_params:
                pid: 136
                opt_period: 0
                opt_Month: ''
                opt_Year: ''
                send: send
                opt_Response: 99
                dataindx: 0
            url_dates:
                opt_Month: '{u_start.month}'
                opt_Year: '{u_start.year}'
            x_Month: '{u_start.month}'
            x_Year: '{u_start.year}'
            frequency: M
            start: 2006-01-01
            end: recent
            filetype: xls           
"""
conf = yaml.load(conf)

In [5]:
def make_url(url_template, filetype, source, tech, start, end, session, url_params):
    """construct URLs from a template, filling in start- and enddates and call download funtion."""    
    filename = source+'_'+tech+'_'+start.strftime('%Y-%m-%d')+'_'+end.strftime('%Y-%m-%d')

    conf['ENTSO-E']['Data_Portal']['url_params']['opt_Month'] = (
        conf['ENTSO-E']['Data_Portal']['x_Month'].format(u_start = start, u_end = end)
        )
    conf['ENTSO-E']['Data_Portal']['url_params']['opt_Year'] = (
        conf['ENTSO-E']['Data_Portal']['x_Year'].format(u_start = start, u_end = end)
        )
    resp = session.get(url_template, params=url_params)
    
    original_filename = resp.headers['content-disposition'].split('filename=')[-1].replace('"','').replace(';','')
    logger.info('Attempting download of: %s \n From URL: %s \n original filename: %s', filename, resp.url, original_filename)
    work_file = downloadpath+filename+'.'+filetype
    if os.path.exists(work_file):
        logger.info('Filename already exists. Skip to next.')
    else:
        with open(work_file, 'wb') as output_file:
            for chunk in resp.iter_content(1024):
                output_file.write(chunk)

In [7]:
for source, tech in conf.items():
    for tech, parameter in tech.items():
        session = requests.session()
        g_start = parameter['start']
#        g_start = datetime.date(2015,12,1)
        if parameter['end'] == 'recent':
            g_end = datetime.date(2015,12,31)
        else:
            g_end = param['end']

        break_dates = pd.date_range(start=g_start, end=g_end, freq=parameter['frequency'])
        for date in break_dates:
            p_start = date.replace(day = 1)
            if parameter['frequency'] == 'M':
                p_end = p_start + relativedelta(months = 1, days = -1)
            if parameter['frequency'] == 'Y':
                p_end = p_start + relativedelta(years = 1, days = -1)
                
            make_url(parameter['url_template'], parameter['filetype'], source, tech, p_start, p_end, session, parameter['url_params'])   


INFO:log:Attempting download of: ENTSO-E_Data_Portal_2015-12-01_2015-12-31 
 From URL: https://www.entsoe.eu/fileadmin/template/other/statistical_database/excel.php?opt_Month=12&opt_Year=2015&pid=136&send=send&dataindx=0&opt_period=0&opt_Response=99 
 original filename: Statistics.xls


In [27]:
def readData(filePath, source, tech):
    data = pd.read_excel(
        io = filePath,
        header=9,
        skiprows = None,
        index_col = [0,1],
        parse_cols = None #None means: parse all columns
        )
    
#   #Create a list of the dst-transistion hours
    dst_transition_times = [d.replace(hour=2) for d in pytz.timezone('Europe/Berlin')._utc_transition_times[1:]]
    
    #the original data has days and countries in the rows and hours in the columns.
    #this rearranges the table, mapping hours on the rows and countries on the columns 
    data = data.stack(level=None).unstack(level='Country').reset_index()    
    #pythons DataFrame.stack() puts former columnnames in a new index object named after their level
    data.rename(columns={'level_1': 'raw_hour'}, inplace=True)
    
    #truncate the hours column and replace letters (incating which is which during fall dst-transition)
    #hours are indexed 1-24 rather then 0-23, so we deduct 1
    data['hour'] = (data['raw_hour'].str[:2].str.replace('A','').str.replace('B','').astype(int) - 1).astype(str)    
    data['dt_index'] = pd.to_datetime(data['Day']+' '+data['hour']+':00', infer_datetime_format = True)
    data.set_index('dt_index', inplace=True)    
    
    # drop 2nd occurence of 03:00 appearing in October data except for autumn dst-transition
    data = data[~((data['raw_hour'] == '3B:00:00') & ~(data.index.isin(dst_transition_times)))]
    #drop 03:00 for (spring) dst-transition. October data is unaffected because the format is 3A:00/3B:00 
    data = data[~((data['raw_hour'] == '03:00:00') & (data.index.isin(dst_transition_times)))]
    
    data.index = data.index.tz_localize('Europe/Berlin', ambiguous='infer')
    data.drop(['Day', 'hour', 'raw_hour'], axis=1, inplace = True)
    data.rename(columns=lambda x: 'load_'+x, inplace=True)
    return data

In [30]:
resultDataSet = pd.DataFrame()
for source, tech in conf.items():
    for tech, param in tech.items():
        for filename in os.listdir(downloadpath):
            if source in filename and tech in filename:
                logger.info('reading %s', filename)
                dataToAdd = readData(downloadpath + filename, source, tech)
                resultDataSet = resultDataSet.combine_first(dataToAdd)

INFO:log:reading ENTSO-E_Data_Portal_2006-01-01_2006-01-31.xls
INFO:log:reading ENTSO-E_Data_Portal_2006-02-01_2006-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-03-01_2006-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-04-01_2006-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-05-01_2006-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-06-01_2006-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-07-01_2006-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-08-01_2006-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-09-01_2006-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-10-01_2006-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-11-01_2006-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2006-12-01_2006-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-01-01_2007-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-02-01_2007-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-03-01_2007-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-04-01_2007-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-05-01_2007-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-06-01_2007-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-07-01_2007-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-08-01_2007-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-09-01_2007-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-10-01_2007-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-11-01_2007-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2007-12-01_2007-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-01-01_2008-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-02-01_2008-02-29.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-03-01_2008-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-04-01_2008-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-05-01_2008-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-06-01_2008-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-07-01_2008-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-08-01_2008-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-09-01_2008-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-10-01_2008-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-11-01_2008-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2008-12-01_2008-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-01-01_2009-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-02-01_2009-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-03-01_2009-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-04-01_2009-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-05-01_2009-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-06-01_2009-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-07-01_2009-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-08-01_2009-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-09-01_2009-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-10-01_2009-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-11-01_2009-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2009-12-01_2009-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-01-01_2010-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-02-01_2010-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-03-01_2010-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-04-01_2010-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-05-01_2010-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-06-01_2010-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-07-01_2010-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-08-01_2010-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-09-01_2010-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-10-01_2010-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-11-01_2010-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2010-12-01_2010-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-01-01_2011-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-02-01_2011-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-03-01_2011-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-04-01_2011-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-05-01_2011-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-06-01_2011-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-07-01_2011-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-08-01_2011-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-09-01_2011-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-10-01_2011-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-11-01_2011-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2011-12-01_2011-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-01-01_2012-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-02-01_2012-02-29.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-03-01_2012-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-04-01_2012-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-05-01_2012-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-06-01_2012-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-07-01_2012-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-08-01_2012-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-09-01_2012-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-10-01_2012-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-11-01_2012-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2012-12-01_2012-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-01-01_2013-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-02-01_2013-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-03-01_2013-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-04-01_2013-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-05-01_2013-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-06-01_2013-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-07-01_2013-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-08-01_2013-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-09-01_2013-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-10-01_2013-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-11-01_2013-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2013-12-01_2013-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-01-01_2014-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-02-01_2014-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-03-01_2014-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-04-01_2014-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-05-01_2014-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-06-01_2014-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-07-01_2014-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-08-01_2014-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-09-01_2014-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-10-01_2014-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-11-01_2014-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2014-12-01_2014-12-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-01-01_2015-01-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-02-01_2015-02-28.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-03-01_2015-03-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-04-01_2015-04-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-05-01_2015-05-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-06-01_2015-06-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-07-01_2015-07-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-08-01_2015-08-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-09-01_2015-09-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-10-01_2015-10-31.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-11-01_2015-11-30.xls




INFO:log:reading ENTSO-E_Data_Portal_2015-12-01_2015-12-31.xls





In [32]:
resultDataSet

Country,load_AT,load_BA,load_BE,load_BG,load_CH,load_CS,load_CY,load_CZ,load_DE,load_DK,...,load_NL,load_NO,load_PL,load_PT,load_RO,load_RS,load_SE,load_SI,load_SK,load_UA_W
dt_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-01 00:00:00+01:00,6297,1425,9829,4293,n.a.,5781,,6631,47823,,...,10215,,14268,5159,5957,,,1175,3332,719
2006-01-01 01:00:00+01:00,6005,1358,9447,4186,n.a.,5747,,6346,47823,,...,9979,,13602,4985,5837,,,1149,3165,695
2006-01-01 02:00:00+01:00,5743,1283,9062,4026,n.a.,5380,,6365,43444,,...,9460,,13027,4717,5592,,,1098,3040,654
2006-01-01 03:00:00+01:00,5397,1175,8589,3877,n.a.,4978,,6134,41432,,...,8833,,12606,4356,5383,,,1047,3021,644
2006-01-01 04:00:00+01:00,5213,1134,8247,3724,n.a.,4735,,6361,40508,,...,8525,,12393,4086,5201,,,1018,2959,638
2006-01-01 05:00:00+01:00,5190,1155,8045,3589,n.a.,4554,,6186,38865,,...,8458,,12231,3917,5073,,,1018,2907,685
2006-01-01 06:00:00+01:00,4916,1175,7941,3475,n.a.,4423,,5963,35817,,...,8526,,12119,3839,5005,,,1044,2917,645
2006-01-01 07:00:00+01:00,5158,1194,7554,3420,n.a.,4416,,5900,35555,,...,8760,,11926,3811,4946,,,1079,2874,651
2006-01-01 08:00:00+01:00,5405,1292,7525,3468,n.a.,4617,,6070,36263,,...,9171,,11787,3634,4828,,,1115,3014,669
2006-01-01 09:00:00+01:00,5731,1377,7649,3641,n.a.,4976,,6421,38569,,...,9600,,12098,3610,4680,,,1189,3121,672


In [649]:
resultDataSet.to_csv(outputpath+outputfile, sep=';', float_format='%.2f', decimal=',')