In [None]:
%pip install selenium beautifulsoup4 pandas lxml

import time, subprocess
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urljoin

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC



LIST_URL        = "https://www.cse.lk/pages/announcements/announcements.component.html"

XPATH_CONTAINER = "/html/body/app-root/div/app-announcements/div[3]/div/div/div/div/div[6]/div[2]"
WAIT_SECS       = 25
PAGE_PAUSE      = 1.0
CSV_PATH        = "cse_disclosures.csv"


def start_safari():
    drv = webdriver.Safari()
    drv.set_window_size(1366, 900)
    return drv

def wait_xpath(drv, xpath, secs=WAIT_SECS):
    return WebDriverWait(drv, secs).until(
        EC.presence_of_element_located((By.XPATH, xpath))
    )

def click_next_within_container(container_el) -> bool:
    """Click the DataTables 'Next' button inside the container; False if disabled/not found."""
    try:
        nxt = container_el.find_element(By.CSS_SELECTOR, ".dataTables_paginate .next")
        cls = (nxt.get_attribute("class") or "")
        if "disabled" in cls:
            return False
        nxt.click()
        time.sleep(PAGE_PAUSE)
        return True
    except Exception:
        return False

def parse_rows_from_container_html(container_html: str, base_url: str) -> list[dict]:
    """Parse one page worth of rows from the container HTML."""
    soup = BeautifulSoup(container_html, "lxml")
    table = soup.select_one("table.dataTable")
    out = []
    if not table:
        return out
    for tr in table.select("tbody tr"):
        tds = tr.find_all("td")
        if not tds:
            continue
  
        title_cell = tds[2] if len(tds) > 2 else None
        a = title_cell.find("a", href=True) if title_cell else None
        out.append({
            "Date":   a.get_text(strip=True) if a else (title_cell.get_text(strip=True) if title_cell else ""),
            "Company":    tds[0].get_text(strip=True) if len(tds) > 0 else "",
            "Title": tds[1].get_text(strip=True) if len(tds) > 1 else "",

            "Category": tds[3].get_text(strip=True) if len(tds) > 3 else "",
            "Detail_URL": urljoin(base_url, a["href"]) if a else ""
        })
    return out

driver = start_safari()
driver.get(LIST_URL)


container = wait_xpath(driver, XPATH_CONTAINER)
print("Container found via XPath.")

rows, page = [], 1
while True:

    container = driver.find_element(By.XPATH, XPATH_CONTAINER)
    html = container.get_attribute("innerHTML")

    page_rows = parse_rows_from_container_html(html, LIST_URL)
    rows.extend(page_rows)
    print(f"✓ Page {page} parsed, total rows so far: {len(rows)}")
    page += 1

    if not click_next_within_container(container):
        break

driver.quit()


df = pd.DataFrame(rows).fillna("")

for col in ["Date","Company","Title"]:
    if col not in df.columns:
        df[col] = ""
df_export = df[["Date", "Company", "Title"]].drop_duplicates().reset_index(drop=True)

print("Total rows (export):", len(df_export))
display(df_export.head(12))


df_export.to_csv(CSV_PATH, index=False, encoding="utf-8-sig")
print(f"Exported to {CSV_PATH}")

# Auto-open CSV in Excel (macOS)
try:
    subprocess.run(["open", "-a", "Microsoft Excel", CSV_PATH])
except Exception as e:
    print("Could not auto-open in Excel:", e)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
✅ Container found via XPath.
✓ Page 1 parsed, total rows so far: 25
✓ Page 2 parsed, total rows so far: 31
Total rows (export): 31


Unnamed: 0,Date,Company,Title
0,25-09-2025,ABANS ELECTRICALS PLC,ABANS ELECTRICALS PLC – 43RD ANNUAL GENERAL ME...
1,25-09-2025,AGSTAR PLC,ANNUAL GENERAL MEETING 2025 - APPROVED
2,25-09-2025,AMBEON HOLDINGS PLC,CORPORATE DISCLOSURE
3,26-09-2025,AMW CAPITAL LEASING AND FINANCE PLC,APPOINTMENT OF DIRECTORS
4,26-09-2025,ASIA CAPITAL PLC,DISPOSAL OF SHARES IN ACAP STOCK BROKERS (PVT)...
5,25-09-2025,ASIRI SURGICAL HOSPITAL PLC,Appointment of Mr. M.I. Furkan as a Senior Ind...
6,25-09-2025,ASIRI HOSPITAL HOLDINGS PLC,Appointment of Mr. M.I Furkan as a Senior Inde...
7,26-09-2025,ASSOCIATED MOTOR FINANCE COMPANY PLC,Corporate Disclosure - Acquisition of the Issu...
8,25-09-2025,C M HOLDINGS PLC,Extraordinary General Meeting - Subdivision of...
9,25-09-2025,C M HOLDINGS PLC,Extraordinary General Meeting- Subdivision of ...


💾 Exported to cse_disclosures.csv
