In [4]:
pip install bs4 selenium

Collecting selenium
  Downloading selenium-4.31.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting PySocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]<3,>=1.26->selenium)
  Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)
Downloading selenium-4.31.0-py3-none-any.whl (9.4 MB)
   ---------------------------------------- 0.0/9.4 MB ? eta -:--:--
   --- ------------------------------------ 0.8/9.4 MB 6.

# Import neccessary library

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
import os
import shutil
import re

In [121]:
# --- Configuration ---
download_dir = "./downloads/"
service = Service("./chromedriver-win64/chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)
DOWNLOAD_DIR = os.path.abspath("downloads")
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

In [122]:
# --- Setup Chrome Options ---
options = Options()
options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080")
options.add_experimental_option("prefs", {
    "download.default_directory": DOWNLOAD_DIR,
    "plugins.always_open_pdf_externally": True,
})

driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)

In [123]:
try:
    driver.get("https://ejudgment.kehakiman.gov.my/ejudgmentweb/searchpage.aspx?JurisdictionType=ALL")

    # Wait for date controls to load
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-id='divEJudgmentPortalSearchPageControl']")))

    # Click "Jenis Kes" dropdown and select "Sivil"
    jenis_kes = driver.find_element(By.XPATH, "//span[@data-type='ddlCaseType']")
    jenis_kes.click()
    time.sleep(1)
    driver.find_element(By.XPATH, "//li[contains(text(), 'Sivil')]").click()

    # Set 'Tarikh Keputusan' from and to
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)

    # Open and fill 'from' date picker
    from_picker = driver.find_element(By.XPATH, "//span[@data-type='dpFromDateOfResult']//input")
    from_picker.clear()
    from_picker.send_keys("01 Dis 2024")

    # Open and fill 'to' date picker
    to_picker = driver.find_element(By.XPATH, "//span[@data-type='dpDateOfResult']//input")
    to_picker.clear()
    to_picker.send_keys("31 Dis 2024")

    time.sleep(1)

    # Click the search button
    search_button = driver.find_element(By.XPATH, "//input[@data-type='btnSearch']")
    search_button.click()

    # Get the total number of pages
    total_pages_element = driver.find_element(By.XPATH, "//span[@data-type='TotalPage']")
    total_pages = int(total_pages_element.get_attribute("data-totalpage"))

    total_row = 0
    current_page = 1
    while (current_page <= total_pages):
        time.sleep(60)  # Wait for the page to load
        rows = driver.find_elements(By.CSS_SELECTOR, "table[data-id='tblAPList'] > tbody > tr")
    
        if len(rows) == 1 and "NoRecordFound" in rows[0].get_attribute("innerHTML"):
            print("❌ No results found for the selected criteria.")
            break
    
        print(f"✅ Page {current_page}: Found {len(rows)} row(s)")
    
        processed_rows_on_page = 0 # Counter for successfully processed rows
        
        for i, row in enumerate(rows):
            try:
                columns = row.find_elements(By.TAG_NAME, "td")

                # Skip if row doesn't have enough columns
                if len(columns) < 2:
                    print(f"⚠️ Skipping row {i+1}: insufficient columns ({len(columns)})")
                    continue

                raw_text = columns[1].text.strip()
                nombor_kes = re.sub(r"[\\/:*?\"<>|\n\r]+", "_", raw_text).replace(" ", "_")

                # --- Check 2: Nested Tables ---
                # Check within the *expected* data columns if they contain tables
                has_nested_table = False
                if columns[-1].find_elements(By.TAG_NAME, "table"):
                        # Extract information from the nested table
                        rows_nested = col.find_elements(By.CSS_SELECTOR, "table.innerTable.gridView > tbody > tr")
                        for row_nested in rows_nested:
                            columns_nested = row_nested.find_elements(By.TAG_NAME, "td")

                            # Make sure there are at least two columns before accessing index 1
                            if (len(columns_nested) >= 2):
                                try:
                                    view_btn = columns_nested[1].find_element(By.CSS_SELECTOR, "[data-action='viewdoc']")
                                    break  # Stop if found
                                except:
                                    continue    
                else:
                    try:
                        view_btn = row.find_element(By.CSS_SELECTOR, "[data-action='viewdoc']")
                    except:
                        view_btn = None
                
                # Check if the download button exists
                if not view_btn:
                    print(f"ℹ️ Skipping row {i+1} (Nombor Kes: {nombor_kes}): No download link found.")
                    continue # Skip to the next row if no download button

                # Get current files before download
                before_files = set(os.listdir(download_dir))
                
                doc_id = view_btn.get_attribute("data-documentid")
                download_url = f"https://efs.kehakiman.gov.my/EFSWeb/DocDownloader.aspx?DocumentID={doc_id}&Inline=true"
                
                # Download the file
                driver.execute_script(f"window.open('{download_url}', '_blank');")
                
                timeout = 15  # max seconds to wait for the file
                start_time = time.time()
                new_files = []
                
                while time.time() - start_time < timeout:
                    after_files = set(os.listdir(download_dir))
                    new_files = list(after_files - before_files)
                    if any(f.endswith(".pdf") for f in new_files):
                        break
                    time.sleep(1)  # check every second
                    
                new_pdfs = [f for f in new_files if f.endswith(".pdf")]

                if new_pdfs:
                    new_file = new_pdfs[0]
                    new_path = os.path.join(download_dir, new_file)
                    renamed_path = os.path.join(download_dir, f"{nombor_kes}.pdf")

                    shutil.move(new_path, renamed_path)
                    print(f"📥 Downloaded and renamed: {nombor_kes}.pdf")
                else:
                    print(f"⚠️ PDF not detected for: {nombor_kes}")

                processed_rows_on_page += 1 # Increment counter for successful processing
            except Exception as e:
                print(f"⚠️ Error processing {nombor_kes}: {e}")
                
        print(f"✅ Processed {processed_rows_on_page} rows on page {current_page}.")
        total_row +=processed_rows_on_page 
            
        # Move to next page only if more pages remain
        if current_page < total_pages:
            try:
                next_btn = driver.find_element(By.XPATH, "//span[@class='fa fa-forward']")
                next_btn.click()
                current_page += 1
            except Exception as e:
                print(f"⚠️ Could not click Next: {e}")
                break
        else:
            print("🚩 Reached last page.")
            break

except Exception as e:
    print(f"❌ Error: {e}")
finally:
    print("All file downloaded successfully")
    print(f"Total files: {total_row}")
    driver.quit()

✅ Page 1: Found 20 row(s)
📥 Downloaded and renamed: C-02(IM)-1957-11_2023_(Mahkamah_Rayuan).pdf
📥 Downloaded and renamed: BA-22NCvC-513-11_2019_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: WA-12BNCvC-174-12_2022_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: KB-B53KP-1-09_2020_(Mahkamah_Sesyen).pdf
📥 Downloaded and renamed: BA-12B-49-05_2024_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: W-02(IM)(IPCv)-32-01_2024_(Mahkamah_Rayuan).pdf
📥 Downloaded and renamed: PA-23NCvC-8-03_2023_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: BD-A54-7-07_2024_(Mahkamah_Sesyen).pdf
📥 Downloaded and renamed: JA-B52NCC-145-10_2022_(Mahkamah_Sesyen).pdf
📥 Downloaded and renamed: AA-22NCC-9-09_2022_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: DA-21NCvC-17-10_2020_(Mahkamah_Tinggi).pdf
📥 Downloaded and renamed: WA-A52NCvC-307-04_2021_(Mahkamah_Sesyen).pdf
📥 Downloaded and renamed: AA-A72NCvC-51-02_2023_(Mahkamah_Majistret).pdf
📥 Downloaded and renamed: DA-23NCvC-2-12_2020_(Mahkamah_Tinggi).pdf
📥 Dow