In [1]:
import os
import re
import time
import requests
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from urllib.parse import urlparse
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.common.action_chains import ActionChains


In [3]:
# ------------------------- CONFIG -------------------------
TEST_MODE = True
EXCEL_PATH = "Higher Ed cusips_test.xlsx" if TEST_MODE else "Higher Ed cusips.xlsx"
EXCEL_OUTPUT = "disclosure_document_list_test.csv" if TEST_MODE else "disclosure_document_list.csv"
ROOT_DIR = Path("university_pdfs_test" if TEST_MODE else "university_pdfs")
FAILED_LOG_PATH = "failed_downloads_test.csv" if TEST_MODE else "failed_downloads.csv"
TMP_DIR = Path("__tmp_downloads")
WAIT_TIME = 10
TIMEOUT = 20
SLEEP = 0.3
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    )
}
YEAR = 2024
# ----------------------------------------------------------

# Load CUSIP data
holdings = pd.read_excel(EXCEL_PATH, sheet_name='Holdings')
index = pd.read_excel(EXCEL_PATH, sheet_name='Index')
df_cusips = pd.concat([holdings[['Cusip 8', 'CREDIT']], index[['Cusip 8', 'CREDIT']]]).reset_index(drop=True)
df_cusips = df_cusips.groupby('CREDIT')['Cusip 8'].first().reset_index()
list_cusip = df_cusips['Cusip 8'].to_list()

def slugify(text):
    return re.sub(r"[^\w\-. ]", "_", text).strip().replace(" ", "_")

def setup_browser(download_dir):
    chrome_opts = Options()
    chrome_opts.add_argument("--no-sandbox")
    chrome_opts.add_argument("--disable-gpu")
    chrome_opts.add_argument("--disable-dev-shm-usage")
    chrome_opts.add_experimental_option("prefs", {
        "download.default_directory": str(download_dir.resolve()),
        "download.prompt_for_download": False,
        "plugins.always_open_pdf_externally": True,
    })
    return webdriver.Chrome(options=chrome_opts)

def handle_cookie_consent(driver):
    try:
        accept_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.ID, "ctl00_mainContentArea_disclaimerContent_yesButton"))
        )
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", accept_button)
        time.sleep(0.3)
        accept_button.click()
        print("Clicked 'Accept' button")
        time.sleep(0.5)
        body = driver.find_element(By.TAG_NAME, "body")
        ActionChains(driver).move_to_element_with_offset(body, 0, 0).click().perform()
        print("Performed dummy click")
    except TimeoutException:
        print("No cookie banner found")
    except Exception as e:
        print(f"Cookie error: {e}")

def click_disclosure_tab_with_retry(driver, retries=3):
    for attempt in range(retries):
        try:
            disclosure_tab = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.XPATH, '//a[@href="#tabDisclosureDocuments"]'))
            )
            driver.execute_script('arguments[0].scrollIntoView({block: "center"});', disclosure_tab)
            time.sleep(0.4)
            driver.execute_script("arguments[0].click();", disclosure_tab)
            return
        except StaleElementReferenceException:
            print(f"Attempt {attempt + 1}: Stale tab. Retrying...")
            time.sleep(1)
    raise Exception("Could not click Disclosure tab")

def extract_tooltip_pdfs(driver):
    soup = BeautifulSoup(driver.page_source, "html.parser")
    results = []
    for tooltip in soup.select("a.ihpQtipHelp.rtTip[help]"):
        help_html = tooltip.get("help")
        section_name = tooltip.get_text(strip=True)
        if not help_html:
            continue
        inner_soup = BeautifulSoup(help_html, "html.parser")
        for a in inner_soup.find_all("a"):
            href = a.get("href")
            doc_text = a.text.strip()
            if href and href.endswith(".pdf"):
                full_url = f"https://emma.msrb.org{href}" if not href.startswith("http") else href
                combined_name = f"{section_name} - {doc_text}"
                results.append({
                    "document_name": combined_name,
                    "pdf_url": full_url
                })
    return results

def download_via_requests(url, dest_path):
    try:
        r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
        if r.status_code == 403:
            return False
        r.raise_for_status()
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        with open(dest_path, "wb") as f:
            f.write(r.content)
        return True
    except Exception as e:
        print(f"[requests fail] {url} → {e}")
        return False

def download_via_chrome(driver, url, dest_path):
    TMP_DIR.mkdir(exist_ok=True)
    for f in TMP_DIR.glob("*"):
        f.unlink()
    try:
        driver.get(url)
        time.sleep(WAIT_TIME)
        pdf_files = list(TMP_DIR.glob("*.pdf"))
        if not pdf_files:
            print(f"[chrome fail] No PDF for {url}")
            return False
        pdf_file = max(pdf_files, key=os.path.getctime)
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        pdf_file.rename(dest_path)
        return True
    except Exception as e:
        print(f"[chrome error] {url} → {e}")
        return False

def download_pdfs(final_df):
    failed_downloads = []
    driver = setup_browser(TMP_DIR)
    try:
        for credit, group in tqdm(final_df.groupby("CREDIT"), desc="Universities"):
            folder = ROOT_DIR / slugify(credit)
            for _, row in group.iterrows():
                url = row["pdf_url"]
                name = slugify(row["document_name"])
                ext = Path(urlparse(url).path).suffix or ".pdf"
                target = folder / f"{name}{ext}"
                if target.exists():
                    continue
                success = download_via_requests(url, target)
                if not success:
                    print(f"[Chrome fallback] {url}")
                    success = download_via_chrome(driver, url, target)
                if not success:
                    failed_downloads.append({
                        "CREDIT": credit,
                        "document_name": row["document_name"],
                        "pdf_url": url
                    })
                time.sleep(SLEEP)
    finally:
        driver.quit()
        for f in TMP_DIR.glob("*"):
            f.unlink()
        TMP_DIR.rmdir()

    if failed_downloads:
        fail_df = pd.DataFrame(failed_downloads)
        fail_df.to_csv(FAILED_LOG_PATH, index=False)
        print(f"\nLogged {len(failed_downloads)} failed downloads to {FAILED_LOG_PATH}")

def filter_documents(df):
    keywords = [
        "annual disclosure",
        "financial statement",
        "financial disclosure",
        "audited financials",
        "continuing disclosure"
    ]
    return df[df['document_name'].str.lower().apply(
        lambda x: any(keyword in x for keyword in keywords)
    )]

def filter_documents(df):
    """
    Cleans and filters the EMMA disclosure document list:
      1) Extracts the first 4-digit year from `document_name` and keeps only rows matching `year`.
      2) Scores each name for keyword matches.
      3) For duplicate pdf_url, keeps the row with the highest score (ties → shortest name).
    """
    df = df.copy()
    
    # 1) parse year
    df['year_found'] = (
        df['document_name']
          .str.extract(r'(\d{4})', expand=False)
          .astype(float)
    )
    # df = df[df['year_found'] == year]
    return df
    
    # # 2) define and apply a simple keyword‐match score
    # keywords = [
    #     "annual disclosure",
    #     "financial statement",
    #     "financial disclosure",
    #     "audited financials",
    #     "continuing disclosure",
    #     "enroll"
    # ]
    # def name_score(name: str) -> int:
    #     nl = name.lower()
    #     return sum(int(k in nl) for k in keywords)
    # df['score'] = df['document_name'].apply(name_score)
    
    # # 3) dedupe by pdf_url
    # keepers = []
    # for url, group in df.groupby('pdf_url', sort=False):
    #     grp = group.copy()
    #     max_score = grp['score'].max()
    #     best = grp[grp['score'] == max_score].copy()
        
    #     # if multiple share the same top score, pick the shortest name
    #     best['name_len'] = best['document_name'].str.len()
    #     best = best[best['name_len'] == best['name_len'].min()]
        
    #     # take the first of the best
    #     keepers.append(best.iloc[0])
    
    # result = pd.DataFrame(keepers)
    
    # # drop helper columns
    # return result.drop(columns=['year_found','score','name_len'])


In [5]:
# # ------------------ MAIN SCRAPING SECTION ------------------
# cookie_handled = False
# final_df = pd.DataFrame()
driver = webdriver.Chrome()
driver.get("https://emma.msrb.org/")

cookie_handled = False
all_records = []

for c in tqdm(list_cusip, desc="CUSIPs"):
    print(f"\nProcessing {c}")
    try:
        # 1) Search
        box = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "quickSearchText"))
        )
        box.clear(); box.send_keys(c); box.send_keys(Keys.RETURN)

        # 2) Cookies/Terms
        if not cookie_handled:
            handle_cookie_consent(driver)
            cookie_handled = True

        # 3) Click Disclosure tab
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.XPATH, '//ul[contains(@class,"ui-tabs-nav")]'))
        )
        click_disclosure_tab_with_retry(driver)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.ID, "tabDisclosureDocuments"))
        )

        # 4) Select “All” and click Search to load historic docs
        try:
            all_radio = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((
                    By.CSS_SELECTOR,
                    'input[name="Filter.SelectedPredefinedDateRange"][value="All"]'
                ))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", all_radio)
            time.sleep(0.2)
            if not all_radio.is_selected():
                all_radio.click()

            search_link = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.LINK_TEXT, "Search"))
            )
            search_link.click()

            # wait for oldest year (e.g. 2016) to appear
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((
                    By.XPATH,
                    "//div[@id='tabDisclosureDocuments']//td[text()='06/30/2016']"
                ))
            )
        except Exception:
            pass

        # 5) Grab _all_ PDF links in this panel
        pdf_links = driver.find_elements(
            By.XPATH,
            "//div[@id='tabDisclosureDocuments']//a[contains(@href,'.pdf')]"
        )

        for link in pdf_links:
            try:
                name = link.text.strip()
                href = link.get_attribute("href")
                url  = href if href.startswith("http") else f"https://emma.msrb.org{href}"

                # find its row
                row = link.find_element(By.XPATH, "./ancestor::tr[1]")
                cols = row.find_elements(By.TAG_NAME, "td")

                # period / posted
                if len(cols) == 2:
                    # Official Statements table
                    period = ""
                    posted = cols[1].text.strip()
                else:
                    period = cols[1].text.strip() if len(cols)>1 else ""
                    posted = cols[2].text.strip() if len(cols)>2 else ""

                # subgroup header (groupRow) if it exists
                subgroup = ""
                try:
                    subgroup = row.find_element(
                        By.XPATH,
                        "preceding-sibling::tr[contains(@class,'groupRow')][1]/th"
                    ).text.strip()
                except:
                    pass

                all_records.append({
                    "CUSIP":          c,
                    "subgroup":       subgroup,
                    "document_name":  name,
                    "pdf_url":        url,
                    "period_date":    period,
                    "posted_date":    posted
                })
            except StaleElementReferenceException:
                continue

        # 6) hidden tooltip PDFs
        for pdf in extract_tooltip_pdfs(driver):
            all_records.append({
                "CUSIP":          c,
                "subgroup":       "",
                "document_name":  pdf["document_name"],
                "pdf_url":        pdf["pdf_url"],
                "period_date":    "",
                "posted_date":    ""
            })

    except Exception as e:
        print("Error for", c, "→", e)

driver.quit()

# 7) Build DataFrame & save
df = pd.DataFrame(all_records)
df = pd.merge(df, df_cusips, how='left', left_on='CUSIP', right_on='Cusip 8').drop(columns=['Cusip 8'])
df = pd.read_csv('/mnt/data/disclosure_document_list.csv')

# 7a) Sort so that rows with subgroup & dates bubble to the top
df_sorted = df.sort_values(
    by=[
        df['subgroup'].notna(),    # True > False
        df['period_date'].notna(),
        df['posted_date'].notna()
    ],
    ascending=False
)

# 7b) Drop duplicates, keeping that “best” row per URL
df_unique = df_sorted.drop_duplicates(subset='pdf_url', keep='first')

print("Total docs:", len(df))
df.to_csv("disclosure_document_list.csv", index=False)


CUSIPs:   0%|                                            | 0/24 [00:00<?, ?it/s]


Processing 04048RXF
Clicked 'Accept' button
Performed dummy click


CUSIPs:   4%|█▌                                  | 1/24 [00:04<01:50,  4.81s/it]

Error for 04048RXF → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:   8%|███                                 | 2/24 [00:09<01:40,  4.58s/it]

Error for 45204FNN → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  12%|████▌                               | 3/24 [00:14<01:38,  4.67s/it]

Error for 57584YPC → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  17%|██████                              | 4/24 [00:19<01:39,  4.95s/it]

Error for 13077C3T → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  21%|███████▌                            | 5/24 [00:23<01:31,  4.80s/it]

Error for 25483VSK → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  25%|█████████                           | 6/24 [00:28<01:25,  4.72s/it]

Error for 65000BW3 → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  29%|██████████▌                         | 7/24 [00:32<01:18,  4.62s/it]

Error for 267045PR → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  33%|████████████                        | 8/24 [00:37<01:12,  4.56s/it]

Error for 295435CR → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  38%|█████████████▌                      | 9/24 [00:41<01:08,  4.58s/it]

Error for 77615NAP → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  42%|██████████████▌                    | 10/24 [00:46<01:04,  4.60s/it]

Error for 594712TS → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  46%|████████████████                   | 11/24 [00:51<00:59,  4.60s/it]

Error for 594746PE → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  50%|█████████████████▌                 | 12/24 [00:55<00:53,  4.50s/it]

Error for 594746QT → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  54%|██████████████████▉                | 13/24 [00:59<00:48,  4.44s/it]

Error for 424682HY → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  58%|████████████████████▍              | 14/24 [01:04<00:43,  4.39s/it]

Error for 355697CD → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  62%|█████████████████████▉             | 15/24 [01:08<00:41,  4.56s/it]

Error for 64990C7S → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  67%|███████████████████████▎           | 16/24 [01:14<00:37,  4.72s/it]

Error for 64983XWC → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  71%|████████████████████████▊          | 17/24 [01:19<00:34,  4.87s/it]

Error for 677632N7 → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  75%|██████████████████████████▎        | 18/24 [01:23<00:28,  4.74s/it]

Error for 57585BAK → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  79%|███████████████████████████▋       | 19/24 [01:28<00:23,  4.64s/it]

Error for 60636AMM → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  83%|█████████████████████████████▏     | 20/24 [01:32<00:18,  4.58s/it]

Error for 6460666J → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  88%|██████████████████████████████▋    | 21/24 [01:37<00:13,  4.56s/it]

Error for 57421CCJ → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  92%|████████████████████████████████   | 22/24 [01:41<00:09,  4.58s/it]

Error for 8821357G → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs:  96%|█████████████████████████████████▌ | 23/24 [01:46<00:04,  4.52s/it]

Error for 91417NKS → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129

CUSIPs: 100%|███████████████████████████████████| 24/24 [01:51<00:00,  4.63s/it]

Error for 914460QJ → Message: no such element: Unable to locate element: {"method":"xpath","selector":"./ancestor::tr[1]"}
  (Session info: chrome=136.0.7103.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001012cc3e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x00000001012c46a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000100e01fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000100e491bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000100e3e9a0 cxxbridge1$string$len + 338432
5   chromedriver                        0x0000000100e8a044 cxxbridge1$string$len + 647332
6   chromedriver                        0x0000000100e3d3f8 cxxbridge1$string$len + 332888
7   chromedriver                        0x000000010129




FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/disclosure_document_list.csv'

In [9]:
df.to_csv("disclosure_document_list.csv", index=False)