In [1]:
!pip install selenium

In [2]:
pip install webdriver-manager==3.0.0

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException, StaleElementReferenceException
import os
import time
import logging
import sys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def wait_for_element(driver, by, value, timeout=20, condition="clickable"):
    """Wait for and return an element."""
    wait = WebDriverWait(driver, timeout)
    try:
        if condition == "clickable":
            element = wait.until(EC.element_to_be_clickable((by, value)))
        elif condition == "present":
            element = wait.until(EC.presence_of_element_located((by, value)))
        elif condition == "visible":
            element = wait.until(EC.visibility_of_element_located((by, value)))
        logger.info(f"Found element: {value}")
        return element
    except TimeoutException:
        logger.error(f"Timeout waiting for element: {value}")
        raise
    except StaleElementReferenceException:
        logger.error(f"Element became stale: {value}")
        raise

def select_from_mui_dropdown(driver, dropdown_selector, option_text, is_xpath=True):
    """Helper function to select an option from a MUI dropdown."""
    try:
        # Wait for and click the dropdown
        dropdown = wait_for_element(
            driver,
            By.XPATH if is_xpath else By.CSS_SELECTOR,
            dropdown_selector,
            condition="clickable"
        )
        
        # Try multiple methods to open the dropdown
        try:
            # Method 1: Direct click
            dropdown.click()
        except:
            try:
                # Method 2: JavaScript click
                driver.execute_script("arguments[0].click();", dropdown)
            except:
                try:
                    # Method 3: Action chains
                    actions = ActionChains(driver)
                    actions.move_to_element(dropdown).click().perform()
                except:
                    # Method 4: Send space key
                    dropdown.send_keys(Keys.SPACE)
        
        time.sleep(1)  # Wait for dropdown animation
        
        # Wait for and click the option
        option = wait_for_element(
            driver,
            By.XPATH,
            f"//li[contains(@class, 'MuiMenuItem-root') and normalize-space(text())='{option_text}']",
            condition="clickable"
        )
        driver.execute_script("arguments[0].click();", option)
        
        return True
    except Exception as e:
        logger.error(f"Failed to select from dropdown: {str(e)}")
        return False

def setup_driver(download_dir):
    """Set up and return the Chrome WebDriver with appropriate options."""
    options = Options()
    options.add_argument('--headless')  # Enable headless mode
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')
    
    # Configure download settings
    prefs = {
        "download.default_directory": download_dir,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True,
        "plugins.always_open_pdf_externally": True
    }
    options.add_experimental_option("prefs", prefs)
    
    # Specify Brave browser binary location
    brave_path = r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe"
    if not os.path.exists(brave_path):
        logger.error(f"Brave browser not found at: {brave_path}")
        raise FileNotFoundError(f"Brave browser not found at: {brave_path}")
    options.binary_location = brave_path
    
    # Set up ChromeDriver service
    chromedriver_path = r"C:\ChromeDriver\chromedriver.exe"
    if not os.path.exists(chromedriver_path):
        logger.error(f"ChromeDriver not found at: {chromedriver_path}")
        raise FileNotFoundError(f"ChromeDriver not found at: {chromedriver_path}")
    service = Service(executable_path=chromedriver_path)
    
    try:
        driver = webdriver.Chrome(service=service, options=options)
        return driver
    except WebDriverException as e:
        logger.error(f"Failed to initialize WebDriver: {str(e)}")
        raise

def download_soil_health_data(state="ANDHRA PRADESH", district="ANANTAPUR", download_dir=None):
    """Main function to download soil health data as a CSV."""
    driver = None
    try:
        # Set up download directory
        if download_dir is None:
            download_dir = os.path.join(os.getcwd(), "downloads")
        
        # Create download directory if it doesn't exist
        os.makedirs(download_dir, exist_ok=True)
        logger.info(f"Download directory set to: {download_dir}")
        
        # Clear any existing files in the download directory
        for file in os.listdir(download_dir):
            file_path = os.path.join(download_dir, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                logger.warning(f"Error deleting file {file_path}: {str(e)}")
        
        logger.info("Initializing WebDriver...")
        driver = setup_driver(download_dir)
        
        logger.info("Navigating to website...")
        driver.get('https://soilhealth.dac.gov.in/piechart')
        
        # Wait for page to load completely
        time.sleep(5)
        
        # Click on MacroNutrient(% View) tab
        logger.info("Clicking MacroNutrient tab...")
        macro_tab = wait_for_element(
            driver, 
            By.XPATH, 
            "//button[contains(@class, 'MuiTab-root') and contains(text(), 'MacroNutrient(% View)')]"
        )
        driver.execute_script("arguments[0].click();", macro_tab)
        logger.info("Clicked on MacroNutrient(% View) tab")
        
        time.sleep(2)  # Wait for tab switch
        
        # Select state using updated selector
        logger.info(f"Selecting state: {state}")
        state_xpath = "//div[contains(@class, 'MuiFormControl-root')]//div[contains(@class, 'MuiSelect-select') and contains(@class, 'MuiOutlinedInput-input') and contains(text(), 'Select a state')]"
        state_success = select_from_mui_dropdown(
            driver,
            state_xpath,
            state
        )
        if not state_success:
            raise Exception("Failed to select state")
        
        time.sleep(3)  # Wait for district dropdown to be populated
        
        # Select district using updated selector
        logger.info(f"Selecting district: {district}")
        district_xpath = "//div[@class='MuiSelect-select MuiSelect-outlined MuiInputBase-input MuiOutlinedInput-input css-qiwgdb' and @role='combobox' and contains(text(), 'Select a district')]"
        district_success = select_from_mui_dropdown(
            driver,
            district_xpath,
            district
        )
        if not district_success:
            raise Exception("Failed to select district")
        
        # Wait for data to load
        time.sleep(3)
        
        # Updated: Click "Export to CSV" button with new selector
        logger.info("Clicking 'Export to CSV' button...")
        export_button = wait_for_element(
            driver, 
            By.CSS_SELECTOR,
            "a.downloadbtn[download='my-file.csv']"
        )
        
        # Get the blob URL from the href attribute
        blob_url = export_button.get_attribute('href')
        if not blob_url or not blob_url.startswith('blob:'):
            raise Exception("Invalid blob URL for download")
            
        # Click the download link
        driver.execute_script("arguments[0].click();", export_button)
        logger.info("Clicked 'Export to CSV' button")
        
        # Wait for CSV download
        expected_file = os.path.join(download_dir, "my-file.csv")
        timeout = 30
        while timeout > 0 and not os.path.exists(expected_file):
            time.sleep(1)
            timeout -= 1
            sys.stdout.write(f"\rWaiting for CSV download... {timeout} seconds remaining")
            sys.stdout.flush()
        
        if os.path.exists(expected_file):
            logger.info("\nCSV file downloaded successfully!")
            return True
        else:
            logger.error("\nDownload timed out!")
            return False
            
    except Exception as e:
        logger.error(f"An error occurred: {str(e)}")
        return False
    finally:
        if driver:
            driver.quit()
            logger.info("Browser closed")

if __name__ == "__main__":
    try:
        # Specify custom download directory (optional)
        download_dir = os.path.join(os.getcwd(), "soil_health_data")
        success = download_soil_health_data(download_dir=download_dir)
        sys.exit(0 if success else 1)
    except Exception as e:
        logger.error(f"Script failed: {str(e)}")
        sys.exit(1)