In [None]:
# Install dependencies first
# pip install selenium pandas requests

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import os
import time
import pandas as pd
from datetime import datetime
import requests

# ------------------- CONFIG -------------------
BASE_URL = "https://hcraj.nic.in/cishcraj-jdp/JudgementFilters/"
DOWNLOAD_DIR = "judgments_pdfs"
CSV_FILE = "downloaded_judgments.csv"

os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# ------------------- DATES -------------------
today = datetime.today()
from_date_str = "01/09/2025"
to_date_str = "11/09/2025"

# ------------------- CHROME OPTIONS -------------------
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run without GUI
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(
    service=Service(r"C:\Users\shubham\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"),
    options=chrome_options
)
wait = WebDriverWait(driver, 40)

driver.get(BASE_URL)

# ------------------- FILL FORM -------------------
wait.until(EC.presence_of_element_located((By.ID, "partyFromDate"))).send_keys(from_date_str)
wait.until(EC.presence_of_element_located((By.ID, "partyToDate"))).send_keys(to_date_str)

# ------------------- REPORTABLE -------------------
driver.find_element(By.ID, "rpjudgeA").click()  # Reportable = YES

# ------------------- CAPTCHA -------------------
captcha_img = wait.until(EC.presence_of_element_located((By.ID, "captcha")))
captcha_img.screenshot("captcha.png")
captcha_code = input("Enter captcha code from captcha.png: ")
driver.find_element(By.ID, "txtCaptcha").send_keys(captcha_code)

# ------------------- SUBMIT -------------------
driver.find_element(By.ID, "btncasedetail1_1").click()

# ------------------- SET "All" RECORDS -------------------
try:
    select_element = WebDriverWait(driver, 40).until(
        EC.presence_of_element_located((By.NAME, "sample_1_length"))
    )
    select = Select(select_element)
    select.select_by_value("10")  # Select "All" records
    time.sleep(7)  # wait for table to refresh
except:
    print("Dropdown for setting all records not found or not loaded.")
    driver.quit()
    exit()
# ----- WAIT FOR TABLE -------------------
try:
    table = wait.until(EC.presence_of_element_located((By.ID, "sample_1")))
    rows = table.find_elements(By.TAG_NAME, "tr")
    print(f"Total records found: {len(rows) - 1}")
except:
    print("No results table found. Either no judgments or captcha was incorrect.")
    driver.quit()
    exit()

# ------------------- LOAD PREVIOUSLY DOWNLOADED -------------------
if os.path.exists(CSV_FILE):
    df_prev = pd.read_csv(CSV_FILE)
    downloaded_ids = set(df_prev['Sr.No.'].astype(str).tolist())
else:
    df_prev = pd.DataFrame()
    downloaded_ids = set()

# ------------------- SCRAPE AND DOWNLOAD -------------------
# Wait until at least one PDF download button is present
wait.until(EC.presence_of_element_located(
    (By.XPATH, "//button[contains(@onclick,\"DownloadOrdJud(this,'D')\")]")
))

# Now fetch table rows
table = driver.find_element(By.ID, "sample_1")
rows = table.find_elements(By.TAG_NAME, "tr")
print(f"Total records found: {len(rows) - 1}")

data_list = []

for row in rows[1:]:
    cols = row.find_elements(By.TAG_NAME, "td")
    if len(cols) < 4:
        continue

    sr_no = cols[0].text.strip()
    case_details = cols[1].text.strip()
    order_date = cols[2].text.strip()

    download_btn = None
    # Use XPath on the row itself to find the 'D' button
    try:
        download_btn = row.find_element(
            By.XPATH, ".//button[contains(@onclick,\"DownloadOrdJud(this,'D')\")]"
        )
    except:
        print(f"PDF download button not found for {sr_no}")
        continue
    download_btn = row.find_element(By.XPATH, ".//button[contains(@onclick,\"DownloadOrdJud(this,'D')\")]")

    pdf_filename = f"{sr_no}.pdf"
    pdf_path = os.path.join(DOWNLOAD_DIR, pdf_filename)

    # Directly use requests as before
    pdf_url = f"https://hcraj.nic.in/cishcraj-jdp/DownloadJudgement.aspx?caseno={download_btn.get_attribute('data-caseno')}&orderno={download_btn.get_attribute('data-orderno')}&cyear={download_btn.get_attribute('data-cyear')}&ftype=PDF"

    r = requests.get(pdf_url, stream=True)
    with open(pdf_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)



    print(f"Downloaded: {pdf_filename}")


    row_data = [sr_no, case_details, order_date, pdf_filename]
    data_list.append(row_data)


# ------------------- SAVE TO CSV -------------------
columns = ["Sr.No.", "Case Details", "Order/Judgement Date", "PDF_File"]
df_new = pd.DataFrame(data_list, columns=columns)

if not df_prev.empty:
    df_final = pd.concat([df_prev, df_new], ignore_index=True)
else:
    df_final = df_new

df_final.to_csv(CSV_FILE, index=False)
print(f"Data saved to {CSV_FILE}. PDFs are in {DOWNLOAD_DIR}")

driver.quit()
