Import Packages

In [None]:
from bs4 import BeautifulSoup
import pandas as pd

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time

Define Scrapping Function

In [None]:
# ADNOC Scrapping Function
def scrape_station_data(driver, region_name, filename):
    print("ok go")
    # Get HTML from the loaded browser session
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    stations = []
    station_blocks = soup.find_all("div", class_="station-map__inner-wrapper")

    for block in station_blocks:
        try:
            h6_tag = block.find("h6", class_="trigger-showOnMap")
            name = h6_tag.text.strip()
            lat = h6_tag.get("data-latitude")
            lon = h6_tag.get("data-longitude")
            station_id = h6_tag.get("data-id")

            address_tag = block.find("p", class_="station-map__text")
            address = address_tag.text.strip() if address_tag else ""

            services_list = block.find("ul", class_="station-map__list")
            services = []
            if services_list:
                for item in services_list.find_all("li", class_="station-map__item"):
                    services.append(item.get_text(strip=True))

            distance_tag = block.find("span", class_="station-map__time-span")
            distance = distance_tag.text.strip() if distance_tag else ""

            directions_link = ""
            directions_tag = block.find("a", class_="station-map__btn-direction")
            if directions_tag and directions_tag.has_attr("href"):
                directions_link = directions_tag["href"]

            stations.append({
                "Station Name": name,
                "Latitude": lat,
                "Longitude": lon,
                "Address": address,
                "Services": ", ".join(services),
                "Distance": distance,
                "Station ID": station_id,
                "Google Maps URL": directions_link,
                "Region": region_name
            })

        except Exception as e:
            print(f"Error parsing a block: {e}")

    # Save to Excel
    df = pd.DataFrame(stations)
    df.to_excel(filename, index=False)
    print(f"Saved to {filename}")

In [None]:
# Define the region name {"Abu Dhabi", "Al Ain", "Al Dhafra"}
region_name = "Abu Dhabi"

# Define the filename output
filename = "../Data/ADNOC_Scrapping/Abudhabi_Adnoc.xlsx"

# Start Chrome session
driver = webdriver.Chrome()
driver.get("https://www.adnocdistribution.ae/en/find-station?Latitude=25.788335&Longtitude&=55.96424&Keyword=&Region=&Services=")
driver.maximize_window()

# Wait for the dropdown to be present
wait = WebDriverWait(driver, 10)
dropdown_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "options-region")))

# Create a Select object
select = Select(dropdown_element)

# Find the value that matches the visible region name
value_to_select = None
for option in select.options:
    if option.text.strip().lower() == region_name.lower():
        value_to_select = option.get_attribute("value")
        break

if value_to_select:
    select.select_by_value(value_to_select)
    print(f"Selected region: {region_name} with value: {value_to_select}")
else:
    print(f"Region '{region_name}' not found in dropdown.")
    driver.quit()
    exit()

# Give a brief pause after selecting the region (to avoid layout shift issues)
time.sleep(1)

# Re-fetch the button again (it may be re-rendered or moved)
search_button = wait.until(EC.presence_of_element_located((By.ID, "btnSearchLocations")))

# Scroll to it again (safe practice in case DOM reflowed)
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", search_button)

# Optional: brief pause to let any layout animations finish
time.sleep(2)

# Click via JavaScript (bypasses interception from overlays)
driver.execute_script("arguments[0].click();", search_button)
print("Clicked the search button via JS.")


# Detect content refresh
try:
    old_station = driver.find_element(By.CLASS_NAME, "station-map__inner-wrapper")
except:
    old_station = None

# Wait for the old station list to disappear
if old_station:
    try:
        wait.until(EC.staleness_of(old_station))
    except TimeoutException:
        print("Old content did not disappear. Proceeding anyway.")

# Wait for new filtered results
try:
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "station-map__inner-wrapper")))
    print(f"Station data for '{region_name}' loaded successfully.")
except TimeoutException:
    print("Timeout while waiting for region-specific data to load.")

# Call the scraping function (now that the page is loaded)
scrape_station_data(driver, region_name, filename)

# Close the driver
driver.quit()


Selected region: Abu Dhabi with value: 1ACBB48FA33649FF902E28FB4F8A7F9B
Clicked the search button via JS.
Station data for 'Abu Dhabi' loaded successfully.
ok go
Saved to ../Data/Selenium_Data/Abudhabi_Adnoc415.xlsx
