In [4]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, WebDriverException, NoSuchElementException
from selenium.common.exceptions import TimeoutException, NoSuchElementException




def clear_cache_and_cookies(driver):
    driver.get("about:blank")
    driver.execute_cdp_cmd('Network.clearBrowserCache', {})
    print("Browser cache cleared.")
    driver.delete_all_cookies()
    print("Browser cookies cleared.")

def initialize_and_login_chrome_driver(download_directory):
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_experimental_option("prefs", {
            "download.default_directory": download_directory,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing.enabled": True
        })
        
        driver = webdriver.Chrome(options=chrome_options)
        print("Chrome driver initialized successfully.")
        
        clear_cache_and_cookies(driver)
        
        driver.get('https://dataonline.bmkg.go.id/data_iklim')
        print("Navigated to BMKG website")

        language = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/button/i")))
        language.click()
        print("Language button clicked")

        english = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
            (By.XPATH,"/html/body/div[1]/div/div/form/div/ul/li[2]/button/i")))
        english.click()
        print("English language selected")

        email = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[1]/div/input")
        email.click()
        email.send_keys('noy@vespertool.com')
        print("Email entered")

        passw = driver.find_element(By.XPATH, "/html/body/div[2]/div/div/div/div[1]/div[1]/div/form[1]/div[2]/div/input")
        passw.click()
        passw.send_keys('IndonesiaWeather15')
        print("Password entered")

        capcha = driver.find_element(By.ID, "captcha")
        capcha.send_keys('X')
        print("Captcha entered")

        time.sleep(5)

        sign_in = driver.find_element(By.XPATH,"//button[@type='submit' and contains(@class, 'btn btn-success pull-right') and contains(text(), 'Sign in')]")
        sign_in.click()
        print("Sign in clicked")

        time.sleep(10)

        select_data = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
           (By.XPATH, "//a[contains(text(), 'Climate Data') and @data-hover='dropdown' and @data-toggle='dropdown']")))
        select_data.click()
        print("Climate Data selected")

        select_daily = driver.find_element(By.XPATH, "//a[@href='https://dataonline.bmkg.go.id/data_iklim' and text()='Daily Data']")
        select_daily.click()
        print("Daily Data selected")
     
        return driver
    except TimeoutException as e:
        print(f"Error initializing and logging into the driver: Timeout occurred - {e}")
    except NoSuchElementException as e:
        print(f"Error initializing and logging into the driver: Element not found - {e}")
    except Exception as e:
        print(f"Error initializing and logging into the driver: {e}")
    return None

def hard_refresh(driver):
    driver.get(driver.current_url + "?cache=false")

def select_station(driver, station_name):
    max_attempts = 3
    attempt = 0

    while attempt < max_attempts:
        try:
            station_type_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[1]/span/span[1]/span/span[1]")))
            station_type_select.click()

            upt_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/span/span/span[2]/ul/li[2]")))
            upt_option.click()

            parameter_checkboxes = driver.find_elements(By.XPATH, "//input[@type='checkbox' and @name='parameter[]']")
            for checkbox in parameter_checkboxes:
                checkbox.click()

            station_name_select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[5]/span/span[1]/span/span[1]")))
            station_name_select.click()

            station_name_select_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/span/span/span[1]/input")))
            station_name_select_2.click()

            station_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, f"//li[contains(text(), '{station_name}')]")))
            station_option.click()

            print(f"Station {station_name} selected successfully")
            break
        except (NoSuchElementException, WebDriverException) as e:
            print(f"Station selection attempt {attempt + 1} failed: {e}. Performing a hard refresh and retrying...")
            hard_refresh(driver)
            attempt += 1

    if attempt == max_attempts:
        print(f"All station selection attempts exhausted. Exiting without successful station selection.")

def download_data(driver, from_date_str, to_date_str, download_directory, max_download_attempts=3):
    attempt = 0
    while attempt < max_download_attempts:
        try:
            from_date = driver.find_element(By.ID, 'from')
            from_date.clear()
            from_date.send_keys(from_date_str)

            time.sleep(3)

            to_date = driver.find_element(By.ID, 'to')
            to_date.clear()
            to_date.send_keys(to_date_str)

            time.sleep(3)

            body = driver.find_element(By.TAG_NAME, 'body')
            body.click()

            time.sleep(3)

            process_button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[1]/div[2]/div/div[2]/form/div[7]/div/div/button/span[1]")))
            process_button.click()
            process_button.click()

            time.sleep(20)

            excellent_rating_1 = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[3]/form/div[1]/div/div[1]/ul/a[5]/i")))
            excellent_rating_1.click()

            easiness_label = driver.find_element(By.XPATH, "//label[contains(text(), 'Easiness of data access')]")
            excellent_rating_2 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_2.click()

            completeness_data = driver.find_element(By.XPATH, "//label[contains(text(), 'Data has relevancy with your work')]")
            excellent_rating_3 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_3.click()

            data_online = driver.find_element(By.XPATH, "//label[contains(text(), 'DataOnline services help your work ')]")
            excellent_rating_4 = driver.find_element(By.XPATH, "//a[@class='star' and @title='Excellent' and @data-value='5']")
            excellent_rating_4.click()

            time.sleep(2)

            send_selection = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, "//i[@class='fa fa-send-o']")))
            send_selection.click()

            time.sleep(20)

            download_button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
                (By.XPATH, "/html/body/div[2]/div[2]/div[3]/div/div/div[2]/div/div/div[2]/div[4]/div/form/button[1]")))
            download_button.click()

            time.sleep(10)

            rename_downloaded_file(download_directory, from_date_str, to_date_str)

            print("Download successful")
            break
        except (NoSuchElementException, WebDriverException, Exception) as e:
            print(f"Download attempt {attempt + 1} failed: {e}. Refreshing the page and retrying...")
            driver.execute_script("location.reload();")
            attempt += 1

            select_station(driver, "Stasiun Klimatologi Sumatera Selatan")

    if attempt == max_download_attempts:
        print(f"All download attempts exhausted. Exiting without successful download.")


import shutil

def rename_downloaded_file(download_directory, from_date_str, to_date_str):
    try:
        # Add a delay to ensure download is complete
        time.sleep(10)

        # List files in the directory for debugging
        files = os.listdir(download_directory)
        print(f"Files in the download directory: {files}")

        # Check if the file exists before attempting to rename
        latest_file = max([os.path.join(download_directory, f) for f in files], key=os.path.getctime)
        print(f"Identified latest file: {latest_file}")

        if not os.path.exists(latest_file):
            print(f"File does not exist: {latest_file}")
            return

        # Check if we have permissions to rename the file
        if not os.access(latest_file, os.W_OK):
            print(f"No write permission for the file: {latest_file}")
            return

        # Ensure the new filename is valid
        from_date_str_safe = from_date_str.replace('/', '-')
        to_date_str_safe = to_date_str.replace('/', '-')
        new_filename = f"{to_date_str_safe}.xlsx"
        new_filepath = os.path.join(download_directory, new_filename)
        print(f"New file path: {new_filepath}")

        if os.path.exists(new_filepath):
            print(f"Target file already exists: {new_filepath}")
            return

        shutil.move(latest_file, new_filepath)
        print(f"Renamed {latest_file} to {new_filename}")
    except FileNotFoundError as fnf_error:
        print(f"File not found error: {fnf_error}")
    except PermissionError as perm_error:
        print(f"Permission error: {perm_error}")
    except Exception as e:
        print(f"Error reading or renaming the Excel file: {e}")




def main():
    driver = None
    download_directory = "/Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Sumatera Selatan"

    try:
        driver = initialize_and_login_chrome_driver(download_directory)

        if driver:
            station_name = "Stasiun Klimatologi Sumatera Selatan"
            select_station(driver, station_name)

            months_to_download =  [
                ('01/01/2016', '31/01/2016'),
                ('01/02/2016', '29/02/2016'),
                ('01/03/2016', '31/03/2016'),
                ('01/04/2016', '30/04/2016'),
                ('01/05/2016', '31/05/2016'),
                ('01/06/2016', '30/06/2016'),
                ('01/07/2016', '31/07/2016'),
                ('01/08/2016', '31/08/2016'),
                ('01/09/2016', '30/09/2016'),
                ('01/10/2016', '31/10/2016'),
                ('01/11/2016', '30/11/2016'),
                ('01/12/2016', '31/12/2016'),
                ('01/01/2017', '31/01/2017'),
                ('01/02/2017', '28/02/2017'),
                ('01/03/2017', '31/03/2017'),
                ('01/04/2017', '30/04/2017'),
                ('01/05/2017', '31/05/2017'),
                ('01/06/2017', '30/06/2017'),
                ('01/07/2017', '31/07/2017'),
                ('01/08/2017', '31/08/2017'),
                ('01/09/2017', '30/09/2017'),
                ('01/10/2017', '31/10/2017'),
                ('01/11/2017', '30/11/2017'),
                ('01/12/2017', '31/12/2017'),
                ('01/01/2018', '31/01/2018'),
                ('01/02/2018', '28/02/2018'),
                ('01/03/2018', '31/03/2018'),
                ('01/04/2018', '30/04/2018'),
                ('01/05/2018', '31/05/2018'),
                ('01/06/2018', '30/06/2018'),
                ('01/07/2018', '31/07/2018'),
                ('01/08/2018', '31/08/2018'),
                ('01/09/2018', '30/09/2018'),
                ('01/10/2018', '31/10/2018'),
                ('01/11/2018', '30/11/2018'),
                ('01/12/2018', '31/12/2018'),
                ('01/01/2019', '31/01/2019'),
                ('01/02/2019', '28/02/2019'),
                ('01/03/2019', '31/03/2019'),
                ('01/04/2019', '30/04/2019'),
                ('01/05/2019', '31/05/2019'),
                ('01/06/2019', '30/06/2019'),
                ('01/07/2019', '31/07/2019'),
                ('01/08/2019', '31/08/2019'),
                ('01/09/2019', '30/09/2019'),
                ('01/10/2019', '31/10/2019'),
                ('01/11/2019', '30/11/2019'),
                ('01/12/2019', '31/12/2019'),
                ('01/01/2020', '31/01/2020'),
                ('01/02/2020', '29/02/2020'),
                ('01/03/2020', '31/03/2020'),
                ('01/04/2020', '30/04/2020'),
                ('01/05/2020', '31/05/2020'),
                ('01/06/2020', '30/06/2020'),
                ('01/07/2020', '31/07/2020'),
                ('01/08/2020', '31/08/2020'),
                ('01/09/2020', '30/09/2020'),
                ('01/10/2020', '31/10/2020'),
                ('01/11/2020', '30/11/2020'),
                ('01/12/2020', '31/12/2020'),
                ('01/01/2021', '31/01/2021'),
                ('01/02/2021', '28/02/2021'),
                ('01/03/2021', '31/03/2021'),
                ('01/04/2021', '30/04/2021'),
                ('01/05/2021', '31/05/2021'),
                ('01/06/2021', '30/06/2021'),
                ('01/07/2021', '31/07/2021'),
                ('01/08/2021', '31/08/2021'),
                ('01/09/2021', '30/09/2021'),
                ('01/10/2021', '31/10/2021'),
                ('01/11/2021', '30/11/2021'),
                ('01/12/2021', '31/12/2021'),
                ('01/01/2022', '31/01/2022'),
                ('01/02/2022', '28/02/2022'),
                ('01/03/2022', '31/03/2022'),
                ('01/04/2022', '30/04/2022'),
                ('01/05/2022', '31/05/2022'),
                ('01/06/2022', '30/06/2022'),
                ('01/07/2022', '31/07/2022'),
                ('01/08/2022', '31/08/2022'),
                ('01/09/2022', '30/09/2022'),
                ('01/10/2022', '31/10/2022'),
                ('01/11/2022', '30/11/2022'),
                ('01/12/2022', '31/12/2022'),
                ('01/01/2023', '31/01/2023'),
                ('01/02/2023', '28/02/2023'),
                ('01/03/2023', '31/03/2023'),
                ('01/04/2023', '30/04/2023'),
                ('01/05/2023', '31/05/2023'),
                ('01/06/2023', '30/06/2023'),
                ('01/07/2023', '31/07/2023'),
                ('01/08/2023', '31/08/2023'),
                ('01/09/2023', '30/09/2023'),
                ('01/10/2023', '31/10/2023'),
                ('01/11/2023', '30/11/2023'),
                ('01/12/2023', '31/12/2023'),
                ('01/01/2024', '31/01/2024'),
                ('01/02/2024', '29/02/2024'),
                ('01/03/2024', '31/03/2024'),
                ('01/04/2024', '30/04/2024'),
                ('01/05/2024', '31/05/2024')
            ]

            for from_date_str, to_date_str in months_to_download:
                download_data(driver, from_date_str, to_date_str, download_directory)

                


    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if driver:
            driver.quit()
            print("Chrome driver closed successfully.")

if __name__ == "__main__":
    main()


Chrome driver initialized successfully.
Browser cache cleared.
Browser cookies cleared.
Navigated to BMKG website
Language button clicked
English language selected
Email entered
Password entered
Captcha entered
Sign in clicked
Climate Data selected
Daily Data selected
Station Stasiun Klimatologi Sumatera Selatan selected successfully
Download attempt 1 failed: Message: 
. Refreshing the page and retrying...
Station Stasiun Klimatologi Sumatera Selatan selected successfully
Download attempt 2 failed: Message: 
. Refreshing the page and retrying...
Station Stasiun Klimatologi Sumatera Selatan selected successfully
Files in the download directory: ['.DS_Store', 'laporan_iklim_harian.xlsx', 'Downloader.ipynb']
Identified latest file: /Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Data/Weather Data/Sumatera Selatan/laporan_iklim_harian.xlsx
New file path: /Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Data/Raw Da

KeyboardInterrupt: 