In [1]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, WebDriverException, NoSuchElementException
from selenium.common.exceptions import TimeoutException, NoSuchElementException





In [4]:

def clear_cache_and_cookies(driver):
    driver.get("about:blank")
    driver.execute_cdp_cmd('Network.clearBrowserCache', {})
    print("Browser cache cleared.")
    driver.delete_all_cookies()
    print("Browser cookies cleared.")


def initialize_and_login_chrome_driver(download_directory):
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_experimental_option("prefs", {
            "download.default_directory": download_directory,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing.enabled": True
        })
        
        driver = webdriver.Chrome(options=chrome_options)
        print("Chrome driver initialized successfully.")
        
        clear_cache_and_cookies(driver)
        
        driver.get('https://dataonline.bmkg.go.id/data_iklim')
        print("Navigated to BMKG website")

        language = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/button/i")))
        language.click()
        print("Language button clicked")

        english = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/ul/li[2]/button/i")))
        english.click()
        print("English language selected")

        email = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[1]/div/input")
        email.click()
        email.send_keys('noy@vespertool.com')
        print("Email entered")

        passw = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[2]/div/input")
        passw.click()
        passw.send_keys('IndonesiaWeather15')
        print("Password entered")

        capcha = driver.find_element(By.ID, "captcha")
        capcha.send_keys('X')
        print("Captcha entered")

        time.sleep(5)

        sign_in = driver.find_element(By.XPATH,"//button[@type='submit' and contains(@class, 'btn btn-success pull-right') and contains(text(), 'Sign in')]")
        sign_in.click()
        print("Sign in clicked")

        time.sleep(10)
     
        return driver
    except TimeoutException as e:
        print(f"Error initializing and logging into the driver: Timeout occurred - {e}")
    except NoSuchElementException as e:
        print(f"Error initializing and logging into the driver: Element not found - {e}")
    except Exception as e:
        print(f"Error initializing and logging into the driver: {e}")
    return None


def hard_refresh(driver):
    driver.get(driver.current_url + "?cache=false")


stations_mapping = [
    {"Stasiun Meteorologi Iskandar":"/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Iskandar"},
    {"Stasiun Meteorologi Japura":"/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Japura"},
    {"Stasiun Meteorologi Nunukan":"/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Nunukan"},
    {"Stasiun Meteorologi Rahadi Oesman":"/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Oesman"},
    {"Stasiun Klimatologi Sumatera Selatan":"/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Sumatera Selatan"}
]

for station in stations_mapping:


    def select_station(driver, station):
        max_attempts = 3
        attempt = 0

        while attempt < max_attempts:
            try:
                
                select_data = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, "//a[contains(text(), 'Climate Data') and @data-hover='dropdown' and @data-toggle='dropdown']")))
                select_data.click()

                select_daily = driver.find_element(By.XPATH, "//a[@href='https://dataonline.bmkg.go.id/data_iklim' and text()='Daily Data']")
                select_daily.click()
                
                station_type_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[1]/span/span[1]/span/span[1]")))
                station_type_select.click()

                upt_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, "/html/body/span/span/span[2]/ul/li[2]")))
                upt_option.click()

                parameter_checkboxes = driver.find_elements(By.XPATH, "//input[@type='checkbox' and @name='parameter[]']")
                for checkbox in parameter_checkboxes:
                    checkbox.click()

                station_name_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[5]/span/span[1]/span/span[1]")))
                station_name_select.click()

                station_name_select_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, "/html/body/span/span/span[1]/input")))
                station_name_select_2.click()

                station_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                    (By.XPATH, f"//li[contains(text(), '{station}')]")))
                station_option.click()

                print(f"Station {station} selected successfully")
                break
            except (NoSuchElementException, WebDriverException) as e:
                print(f"Station selection attempt {attempt + 1} failed: {e}. Performing a hard refresh and retrying...")
                hard_refresh(driver)
                attempt += 1

        if attempt == max_attempts:
            print(f"All station selection attempts exhausted. Exiting without successful station selection.")

        def download_data(driver, from_date_str, to_date_str, download_directory, max_download_attempts=3):
            attempt = 0
            
            while attempt < max_download_attempts:
                try:
                    from_date = driver.find_element(By.ID, 'from')
                    from_date.clear()
                    from_date.send_keys(from_date_str)

                    time.sleep(3)

                    to_date = driver.find_element(By.ID, 'to')
                    to_date.clear()
                    to_date.send_keys(to_date_str)

                    time.sleep(3)

                    body = driver.find_element(By.TAG_NAME, 'body')
                    body.click()

                    time.sleep(3)

                    process_button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[7]/div/div/button/span[1]")))
                    process_button.click()
                    process_button.click()

                    time.sleep(20)

                    excellent_rating_1 = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[3]/form/div[1]/div/div[1]/ul/a[5]/i")))
                    excellent_rating_1.click()

                    easiness_label = driver.find_element(By.XPATH, "//label[contains(text(), 'Easiness of data access')]")
                    excellent_rating_2 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
                    excellent_rating_2.click()

                    completeness_data = driver.find_element(By.XPATH, "//label[contains(text(), 'Data has relevancy with your work')]")
                    excellent_rating_3 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
                    excellent_rating_3.click()

                    data_online = driver.find_element(By.XPATH, "//label[contains(text(), 'DataOnline services help your work ')]")
                    excellent_rating_4 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
                    excellent_rating_4.click()

                    time.sleep(2)

                    send_selection = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                        (By.XPATH, "//i[@class='fa fa-send-o']")))
                    send_selection.click()

                    time.sleep(20)

                    download_button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[4]/div/form/button[1]")))
                    download_button.click()

                    time.sleep(10)

                    rename_downloaded_file(download_directory, from_date_str, to_date_str)

                    print("Download successful")
                    break
                except (NoSuchElementException, WebDriverException, Exception) as e:
                    print(f"Download attempt {attempt + 1} failed: {e}. Refreshing the page and retrying...")
                    driver.execute_script("location.reload();")
                    attempt += 1

                    select_station(driver, "Stasiun Klimatologi Sumatera Selatan")

            if attempt == max_download_attempts:
                print(f"All download attempts exhausted. Exiting without successful download.")
        
        import shutil

def rename_downloaded_file(download_directory, from_date_str, to_date_str):
    try:
        # Add a delay to ensure download is complete
        time.sleep(10)

        # List files in the directory for debugging
        files = os.listdir(download_directory)
        print(f"Files in the download directory: {files}")

        # Check if the file exists before attempting to rename
        latest_file = max([os.path.join(download_directory, f) for f in files], key=os.path.getctime)
        print(f"Identified latest file: {latest_file}")

        if not os.path.exists(latest_file):
            print(f"File does not exist: {latest_file}")
            return

        # Check if we have permissions to rename the file
        if not os.access(latest_file, os.W_OK):
            print(f"No write permission for the file: {latest_file}")
            return

        # Ensure the new filename is valid
        from_date_str_safe = from_date_str.replace('/', '-')
        to_date_str_safe = to_date_str.replace('/', '-')
        new_filename = f"{to_date_str_safe}.xlsx"
        new_filepath = os.path.join(download_directory, new_filename)
        print(f"New file path: {new_filepath}")

        if os.path.exists(new_filepath):
            print(f"Target file already exists: {new_filepath}")
            return

        shutil.move(latest_file, new_filepath)
        print(f"Renamed {latest_file} to {new_filename}")
    except FileNotFoundError as fnf_error:
        print(f"File not found error: {fnf_error}")
    except PermissionError as perm_error:
        print(f"Permission error: {perm_error}")
    except Exception as e:
        print(f"Error reading or renaming the Excel file: {e}")

def main():
    driver = None

    try:
        driver = initialize_and_login_chrome_driver()
        
        if driver:
            for station_name, download_directory in stations_mapping.items():
                select_station(driver, station_name)

                months_to_download =  [
                ('01/01/2016', '31/01/2016')
            ]

                for from_date_str, to_date_str in months_to_download:
                    download_data(driver, from_date_str, to_date_str, download_directory, station_name)

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if driver:
            driver.quit()
            print("Chrome driver closed successfully.")

if __name__ == "__main__":
    main()

        
    

An error occurred: initialize_and_login_chrome_driver() missing 1 required positional argument: 'download_directory'
