In [117]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from functools import reduce

In [151]:
def entsoe_scrapper(url: str, tech: str):
    page = urlopen(url)
    html_bytes = page.read()
    html = html_bytes.decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")
    table = soup.find_all("td")
    mtu_basic = ['t1','t2','t3','t4','t5','t6','t7','t8','t9','t10','t11','t12','t13','t14','t15','t16','t17','t18','t19','t20','t21','t22','t23','t24']
    agg = []
    
    for i,row in enumerate(table):
        if(i % 2 != 0):
            try:
                agg.append(float(row.text.strip()))
            except ValueError:
                agg.append(0)
            
    if len(agg) != 24:
        i = 0
        stop = len(agg)
        new_agg = []
        while i<stop:
            var = (float(agg[i]) + float(agg[i+1]) + float(agg[i+2]) + float(agg[i+3]))/4
            new_agg.append(var)
            i = i + 4
        df = pd.DataFrame({"MTU":mtu_basic, tech:new_agg}) 
    else:
        df = pd.DataFrame({"MTU":mtu_basic, tech:agg})
    return df

In [211]:
def germany(data: str):
    solar = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10Y1001A1001A83F!CTY|10Y1001A1001A83F&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar = entsoe_scrapper(solar, "solar")
    df_offshore = entsoe_scrapper(offshore, "offshore")
    df_onshore = entsoe_scrapper(onshore, "onshore")
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar, df_offshore, df_onshore]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)
    
def france(data: str):
    solar = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YFR-RTE------C!CTY|10YFR-RTE------C&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar = entsoe_scrapper(solar, "solar")
    df_offshore = entsoe_scrapper(offshore, "offshore")
    df_onshore = entsoe_scrapper(onshore, "onshore")
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar, df_offshore, df_onshore]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)
    
def benelux(data: str):
    solar_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore_be = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YBE----------2!CTY|10YBE----------2&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar_be = entsoe_scrapper(solar_be, "solar")
    df_offshore_be = entsoe_scrapper(offshore_be, "offshore")
    df_onshore_be = entsoe_scrapper(onshore_be, "onshore")

    solar_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B16&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    offshore_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B18&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    onshore_ne = f"https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/show?name=&defaultValue=false&viewType=TABLE&areaType=CTY&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime={data}+00:00|CET|DAYTIMERANGE&dateTime.endDateTime={data}+00:00|CET|DAYTIMERANGE&area.values=CTY|10YNL----------L!CTY|10YNL----------L&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC+1)+/+CEST+(UTC+2)"
    df_solar_ne = entsoe_scrapper(solar_ne, "solar")
    df_offshore_ne = entsoe_scrapper(offshore_ne, "offshore")
    df_onshore_ne = entsoe_scrapper(onshore_ne, "onshore")
    
    df_solar_be['solar'] += df_solar_ne['solar']
    df_offshore_be['offshore'] += df_offshore_ne['offshore']
    df_onshore_be['onshore'] += df_onshore_ne['onshore']
    
    # compile the list of dataframes you want to merge
    data_frames = [df_solar_be, df_offshore_be, df_onshore_be]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['MTU'],
                                            how='outer'), data_frames)
    df_merged.set_index(['MTU'], inplace=True)
    return(df_merged)

In [237]:
def all_country(data: str):
    df_de = germany(data).add_suffix('_de')
    df_france = france(data).add_suffix('_fr')
    df_benelux = benelux(data).add_suffix('_benelux')
    df = df_de.join(df_france).join(df_benelux)
    return df

In [238]:
df = all_country("06.01.2021")
df

Unnamed: 0_level_0,solar_de,offshore_de,onshore_de,solar_fr,offshore_fr,onshore_fr,solar_benelux,offshore_benelux,onshore_benelux
MTU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
t1,0.0,5152.5,9810.0,0.0,0,1594.0,0.0,2071.75,731.5
t2,0.0,4979.25,9557.5,0.0,0,1582.0,0.0,2050.0,727.5
t3,0.0,4817.75,9305.75,0.0,0,1586.0,0.0,1832.0,741.0
t4,0.0,4749.75,8993.25,0.0,0,1483.0,0.0,1725.75,711.5
t5,0.0,4750.0,8229.5,0.0,0,1476.0,0.0,1710.75,678.25
t6,0.0,4480.0,7593.75,0.0,0,1444.0,0.0,2172.75,601.5
t7,0.0,4808.25,6994.5,0.0,0,1374.0,0.0,2692.75,527.75
t8,0.25,4703.75,6355.75,0.0,0,1309.0,0.0,2340.5,475.5
t9,98.25,4747.75,6005.25,189.0,0,1182.0,0.0,1865.5,464.75
t10,377.0,4296.25,5514.0,549.0,0,1141.0,30.25,1557.75,388.75
