In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import csv
import time

def setup_driver():
    """ Set up the WebDriver in headless mode for faster execution. """
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Enables headless mode.
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    return driver

def set_display_100(driver):
    """ Set the display to show 100 entries per page. """
    try:
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.NAME, 'datatable_responsive_length'))
        ).click()
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//option[. = "100"]'))
        ).click()
    except (TimeoutException, NoSuchElementException) as e:
        print(f"Error setting display: {e}")

def scrape_ids_on_page(driver):
    """ Scrape the Tameem IDs from the current page. """
    ids = []
    try:
        id_elements = driver.find_elements(By.CSS_SELECTOR, "span[id^='datatable_responsive_Label_TameemID_']")
        ids = [elem.text for elem in id_elements]
    except NoSuchElementException as e:
        print(f"Error finding ID elements: {e}")
    return ids

def navigate_and_scrape(driver, main_page_url):
    """ Navigate through the first four pages and scrape IDs. """
    results = []
    driver.get(main_page_url)
    set_display_100(driver)  # Set 100 entries on the first page

    # Loop through the first four pages
    for _ in range(4):
        current_ids = scrape_ids_on_page(driver)
        results.extend(current_ids)        
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.LINK_TEXT, 'التالي'))
            )
            next_button.click()
            time.sleep(3)  # Allow time for the next page to load fully
        except (TimeoutException, NoSuchElementException):
            print("No more pages to navigate or 'Next' button not clickable.")
            break

    return results

def save_results_to_csv(data):
    """ Save scraped data to a CSV file. """
    with open('tameem_ids.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['TameemID'])  # Header row
        for id in data:
            writer.writerow([id])

def main():
    driver = setup_driver()
    main_page_url = "https://portaleservices.moj.gov.sa/TameemPortal/TameemList.aspx?id"
    try:
        data = navigate_and_scrape(driver, main_page_url)
        save_results_to_csv(data)
        print("Data scraping complete. Results saved to 'tameem_ids.csv'.")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import csv
import json
from tqdm import tqdm

def setup_driver():
    """ Set up the WebDriver for visible execution. """
    options = Options()
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--headless') 

    driver = webdriver.Chrome( options=options)
    return driver

def navigate_and_scrape(driver, base_url, search_id):
    """ Navigate to URL, input ID, search, and scrape details. """
    url = f"{base_url}{search_id}"
    driver.get(url)
    try:
        search_input = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[type='search']"))
        )
        search_input.clear()
        search_input.send_keys(search_id)

        search_button = driver.find_element(By.ID, "Button_search")
        search_button.click()

        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.ID, "lbl_TameemNo"))
        )

        details = {
            'رقم التعميم': driver.find_element(By.ID, "lbl_TameemNo").text,
            'موضوعه': driver.find_element(By.ID, "lbl_SubjectText").text,
            'تاريخه': driver.find_element(By.ID, "lbl_Hdate").text,
            'نص التعميم': driver.find_element(By.ID, "lbl_Text").text
        }
        return details
    except TimeoutException:
        print(f"Timeout occurred while trying to access the page with ID {search_id}.")
        return None
    except NoSuchElementException:
        print(f"Some elements were not found on the page with ID {search_id}.")
        return None
    except Exception as e:
        print(f"An error occurred with ID {search_id}: {e}")
        return None

def process_ids_from_csv(driver, filepath, base_url):
    """ Process each ID from CSV, navigate to URL, and scrape data. """
    results = []
    with open(filepath, newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        next(reader, None)  # Skip header
        for row in tqdm(reader, desc="Processing IDs"):
            if row:
                search_id = row[0]
                result = navigate_and_scrape(driver, base_url, search_id)
                if result:
                    results.append(result)
                    print(json.dumps(result, ensure_ascii=False, indent=4))
    return results

def main():
    """ Main function to set up the driver, process IDs from CSV, and handle data. """
    driver = setup_driver()
    base_url = "https://portaleservices.moj.gov.sa/TameemPortal/TameemList.aspx?id="
    csv_file_path = 'tameem_ids.csv'  # CSV file containing IDs
    results = process_ids_from_csv(driver, csv_file_path, base_url)
    driver.quit()
    # Optionally save results to a JSON file
    with open('tameem.json', 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)

if __name__ == "__main__":
    main()
