In [10]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from functools import reduce
from datetime import date, timedelta
import time

In [2]:
def entsoe_scrapper(url: str, tech: str):
    page = urlopen(url)
    html_bytes = page.read()
    html = html_bytes.decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")
    table = soup.find_all("td")
    mtu_basic = ['t1','t2','t3','t4','t5','t6','t7','t8','t9','t10','t11','t12','t13','t14','t15','t16','t17','t18','t19','t20','t21','t22','t23','t24']
    agg = []
    
    for i,row in enumerate(table):
        if(i % 2 != 0):
            try:
                agg.append(float(row.text.strip()))
            except ValueError:
                agg.append(0)
            
    if len(agg) != 24:
        i = 0
        stop = len(agg)
        new_agg = []
        while i<stop:
            var = (float(agg[i]) + float(agg[i+1]) + float(agg[i+2]) + float(agg[i+3]))/4
            new_agg.append(var)
            i = i + 4
        df = pd.DataFrame({"MTU":mtu_basic, tech:new_agg}) 
    else:
        df = pd.DataFrame({"MTU":mtu_basic, tech:agg})
    return df

In [3]:
def germany(data: str):
    solar = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar = entsoe_scrapper(solar, "i10")
    df_offshore = entsoe_scrapper(offshore, "i8")
    df_onshore = entsoe_scrapper(onshore, "i3")
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar, df_offshore, df_onshore]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)
    
def france(data: str):
    solar = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar = entsoe_scrapper(solar, "i10")
    df_offshore = entsoe_scrapper(offshore, "i8")
    df_onshore = entsoe_scrapper(onshore, "i3")
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar, df_offshore, df_onshore]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)
    
def benelux(data: str):
    solar_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar_be = entsoe_scrapper(solar_be, "i10")
    df_offshore_be = entsoe_scrapper(offshore_be, "i8")
    df_onshore_be = entsoe_scrapper(onshore_be, "i3")

    solar_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar_ne = entsoe_scrapper(solar_ne, "i10")
    df_offshore_ne = entsoe_scrapper(offshore_ne, "i8")
    df_onshore_ne = entsoe_scrapper(onshore_ne, "i3")
    
    df_solar_be['i10'] += df_solar_ne['i10']
    df_offshore_be['i8'] += df_offshore_ne['i8']
    df_onshore_be['i3'] += df_onshore_ne['i3']
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar_be, df_offshore_be, df_onshore_be]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)

In [4]:
def all_country(data: str):
    df_de = germany(data).add_suffix('_de')
    df_france = france(data).add_suffix('_fr')
    df_benelux = benelux(data).add_suffix('_bnl')
    df = df_de.join(df_france).join(df_benelux)
    return df

In [44]:
df1 = all_country("06.01.2021")
df2 = all_country("07.01.2021")
df = pd.concat([df1, df2])
df

Unnamed: 0_level_0,i10_de,i8_de,i3_de,i10_fr,i8_fr,i3_fr,i10_bnl,i8_bnl,i3_bnl
MTU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
t1,0.0,5152.5,9810.0,0.0,0,1594.0,0.0,2071.75,731.5
t2,0.0,4979.25,9557.5,0.0,0,1582.0,0.0,2050.0,727.5
t3,0.0,4817.75,9305.75,0.0,0,1586.0,0.0,1832.0,741.0
t4,0.0,4749.75,8993.25,0.0,0,1483.0,0.0,1725.75,711.5
t5,0.0,4750.0,8229.5,0.0,0,1476.0,0.0,1710.75,678.25
t6,0.0,4480.0,7593.75,0.0,0,1444.0,0.0,2172.75,601.5
t7,0.0,4808.25,6994.5,0.0,0,1374.0,0.0,2692.75,527.75
t8,0.25,4703.75,6355.75,0.0,0,1309.0,0.0,2340.5,475.5
t9,98.25,4747.75,6005.25,189.0,0,1182.0,0.0,1865.5,464.75
t10,377.0,4296.25,5514.0,549.0,0,1141.0,30.25,1557.75,388.75


In [5]:
def benelux_demand(data: str):
    bel = f"https://transparency.entsoe.eu/load-domain/r2/totalLoadR2/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&dateTime.dateTime={data}+00:00|CET|DAY&biddingZone.values=CTY|10YBE----------2!CTY|10YBE----------2&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    ne = f"https://transparency.entsoe.eu/load-domain/r2/totalLoadR2/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&dateTime.dateTime={data}+00:00|CET|DAY&biddingZone.values=CTY|10YNL----------L!CTY|10YNL----------L&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    lux = f"https://transparency.entsoe.eu/load-domain/r2/totalLoadR2/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&dateTime.dateTime={data}+00:00|CET|DAY&biddingZone.values=CTY|10YLU-CEGEDEL-NQ!CTY|10YLU-CEGEDEL-NQ&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"

    df_be = entsoe_scrapper(bel, "bel")
    df_ne = entsoe_scrapper(ne, "ne")
    df_lux = entsoe_scrapper(lux, "lux")
    print(df_be)

In [13]:
sdate = date(2021,1,1)   # start date
edate = date(2022,1,1)
d = pd.date_range(sdate,edate-timedelta(days=1),freq='d').strftime("%d.%m.%Y")

In [14]:
df = pd.DataFrame()
for i in d:
    time.sleep(3)
    df_day = all_country(i)
    df = pd.concat([df, df_day])
df

HTTPError: HTTP Error 429: Too Many Requests

In [37]:
df = df.transpose()

MTU,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,...,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24
solar_de,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,194.0,1363.75,...,685.75,222.25,8.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
offshore_de,383.0,394.5,305.25,319.25,296.25,432.0,510.25,389.25,359.5,248.25,...,5728.0,5760.5,5761.0,5862.25,5867.5,5833.0,5889.5,5940.75,5952.0,6014.0
onshore_de,3934.5,3541.5,3217.0,2787.5,2492.5,2140.75,2010.75,1975.0,2087.5,2123.5,...,10632.25,10743.75,10800.5,10725.75,10600.75,10470.75,10119.75,9587.75,9112.25,9077.25
solar_fr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,505.0,...,1529.0,998.0,441.0,180.0,0.0,0.0,0.0,0.0,0.0,0.0
offshore_fr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
onshore_fr,1082.0,1075.0,1086.0,1149.0,1170.0,1190.0,1261.0,1275.0,1323.0,1294.0,...,1888.0,2170.0,2428.0,2478.0,2596.0,2790.0,2922.0,2906.0,2786.0,2757.0
solar_benelux,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,110.75,...,126.25,58.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
offshore_benelux,474.0,549.5,558.5,580.75,615.75,658.0,719.0,605.5,550.25,607.25,...,4317.25,4290.25,4275.25,4252.25,4217.25,4199.5,4160.0,4020.5,3946.25,3884.0
onshore_benelux,166.75,174.25,190.75,223.5,256.75,291.5,315.25,333.75,321.5,292.0,...,1639.0,1653.0,1662.5,1660.0,1646.0,1635.75,1571.0,1530.0,1426.5,1322.75


In [None]:
input_gen_writer = get_excel_writer('ew3-p-03_input_generated.xlsx')

df.to_excel(excel_writer = input_gen_writer, sheet_name = 'generation', merge_cells = False)

input_gen_writer.close()

In [10]:
d

Index(['01.01.2021', '02.01.2021', '03.01.2021', '04.01.2021', '05.01.2021',
       '06.01.2021', '07.01.2021', '08.01.2021', '09.01.2021', '10.01.2021',
       '11.01.2021', '12.01.2021', '13.01.2021', '14.01.2021', '15.01.2021',
       '16.01.2021', '17.01.2021', '18.01.2021', '19.01.2021', '20.01.2021',
       '21.01.2021', '22.01.2021', '23.01.2021', '24.01.2021', '25.01.2021',
       '26.01.2021', '27.01.2021', '28.01.2021', '29.01.2021', '30.01.2021',
       '31.01.2021'],
      dtype='object')