In [4]:
import calendar
import glob
import os
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, NoSuchWindowException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from datetime import date, datetime
from dateutil import relativedelta, rrule


def website_opener(website_url):
    
    options = Options()
    driver = webdriver.Chrome(options=options)
    time.sleep(1)
    driver.get(website_url)
    time.sleep(1)

    return driver




def cookies_rejection(driver):

    shadow_root_script = "return document.getElementById('usercentrics-root').shadowRoot"
    shadow_root = driver.execute_script(shadow_root_script)
    shadow_root_element = driver.find_element(By.ID, 'usercentrics-root')
    shadow_root = driver.execute_script('return arguments[0].shadowRoot', shadow_root_element)

    reject_button = shadow_root.find_element(By.CSS_SELECTOR, 'button[data-testid="uc-deny-all-button"]')
    driver.execute_script("arguments[0].scrollIntoView();", reject_button)
    time.sleep(1)
    reject_button.click()
    time.sleep(2)
    
    
    
    
    
def date_entry(driver, calendar_id, date_str):
    
    date_input = driver.find_element(By.ID, calendar_id)
    date_input.clear()  
    date_input.send_keys(date_str)
    
    
    
    
def download_button(driver):
    
    time.sleep(1)
    download_button = driver.find_element(By.ID, "dnn_ctr1578_View_btnDownloadGridCsv")
    download_button.click()
    time.sleep(3)
    
    

    
def time_format(date_):
    
    month = int(date_.split('.')[0])
    year = int(date_.split('.')[1])
    num_days = calendar.monthrange(year, month)[1]
    start_date = '.'.join([str(1), str(month), str(year)])
    end_date = '.'.join([str(num_days), str(month), str(year)])
    
    return start_date, end_date




def last_downloaded_finder():
    downloads_dir = os.path.expanduser('~/Downloads')
    # directory = 'path_to_directory'

    pattern = r'reBAP unterdeckt \[(\d{4}-\d{2}-\d{2} \d{2}-\d{2}-\d{2})\]'
    csv_files = glob.glob(os.path.join(downloads_dir, '*.csv'))
    # csv_files = glob.glob(os.path.join(directory, 'reBAP unterdeckt [*].csv'))

    if len(csv_files) == 0:
        print("No CSV files found in the directory.")
    else:
        timestamps = []
        for csv_file in csv_files:
            match = re.search(pattern, csv_file)
            if match:
                timestamps.append(match.group(1))

        timestamps.sort(reverse=True)
        latest_timestamp = timestamps[0]

        file_name = f"reBAP unterdeckt [{latest_timestamp}].csv"
        print(f'Last downloaded rebap file is: {file_name}')

        csv_file_path = os.path.join(downloads_dir, file_name)
    #    csv_file_path = os.path.join(directory, file_name)
        df = pd.read_csv(csv_file_path, delimiter=';')
        
    return df




def get_summer_time_dates(year):
    
    start_date = datetime(year, 3, 31) + relativedelta.relativedelta(weekday=relativedelta.SU(-1))
    end_date = datetime(year, 10, 31) + relativedelta.relativedelta(weekday=relativedelta.SU(-1))

    return start_date, end_date



def web_scrapping_rebap(date_):
    
    url = "https://www.netztransparenz.de/Daten-zur-Regelenergie/reBAP/reBAP"
    driver = website_opener(url)
    time.sleep(1)

    cookies_rejection(driver)
    is_error = False
    
    start_date = time_format(date_)[0]
    end_date = time_format(date_)[1]
    
    date_entry(driver, 'dnn_ctr1578_View_rdpGridDownloadStartDate_dateInput', start_date)
    date_entry(driver, 'dnn_ctr1578_View_rdpGridDownloadEndDate_dateInput', end_date)
    download_button(driver)

    try:
        error_message = ''
        pop_up = ''
        pop_up =  WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[starts-with(@id, "RadWindowWrapper_alert")]')))
        error_message = pop_up.text.split('\n')
        error_message = [word for word in error_message if word.lower() not in ['ok', 'close']]
        if len(error_message):
            print('##########################')
            for each in error_message:
                print(each)
            print('##########################')
            print("\nThere is an error. Check the date entry. Process finished unsuccessfully.\n")  
            return
                    
    except (TimeoutException, NoSuchElementException, NoSuchWindowException):
        pass

    df = last_downloaded_finder()
    driver.quit()    


In [6]:
# date = 'mm.yyyy'. For example, enter '04.2023' for April 2023
date_ = '03.2023'

web_scrapping_rebap(date_)

Last downloaded rebap file is: reBAP unterdeckt [2023-06-22 10-47-46].csv
csv file saved to the path rebap_curve_03.2023.csv


In [7]:
# date = 'mm.yyyy'. For example, enter '04.2023' for April 2023
date_ = '06.2023'

web_scrapping_rebap(date_)

##########################
Fehler
Start/End-Datum muss angegeben sein und sich innerhalb des freigegebenen Zeitraumes befinden.
##########################

There is an error. Check the date entry. Process finished unsuccessfully.

