In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

def slow_scroll_down(driver, pause=0.5):
    height = driver.execute_script("return document.body.scrollHeight")
    for pos in range(0, height, 300):
        driver.execute_script(f"window.scrollTo(0, {pos});")
        time.sleep(pause)

def scroll_after_next(driver):
    # Quickly scroll up to bring table into view
    driver.execute_script("window.scrollTo(0, 0);")
    time.sleep(1)
    # Then slowly scroll down to load all data
    slow_scroll_down(driver, pause=0.6)

def wait_for_table(driver, timeout=20):
    for _ in range(timeout * 2):
        rows = driver.find_elements(By.CSS_SELECTOR, "tr")
        if len(rows) > 0 and any(row.text.strip() for row in rows):
            return
        time.sleep(0.5)
    raise Exception("❌ Table rows did not load in time.")

def scrape_all_pages():
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get("https://sarmaaya.pk/mutual-funds/")
    wait = WebDriverWait(driver, 30)

    # Step 1: Select 100 rows per page
    print("⌛ Waiting for dropdown...")
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "select[name*='length']")))
    Select(driver.find_element(By.CSS_SELECTOR, "select[name*='length']")).select_by_value("100")
    print("✅ Selected 100 rows")
    time.sleep(5)

    all_data = []
    page = 1

    while True:
        print(f"📄 Scraping page {page}...")
        if page == 1:
            slow_scroll_down(driver)
        else:
            scroll_after_next(driver)

        try:
            wait_for_table(driver)
            rows = driver.find_elements(By.CSS_SELECTOR, "tr")
            for row in rows:
                cols = row.find_elements(By.TAG_NAME, "td")
                all_data.append([col.text.strip() for col in cols])
            print(f"✅ Scraped {len(rows)} rows on page {page}")
        except Exception as e:
            print(f"⚠️ Failed to get rows: {e}")
            break
        try:
            # Wait for Next button to appear
            next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".paginate_button.page-item.next")))

            # Check if it's disabled
            if "disabled" in next_btn.get_attribute("class"):
                print("🛑 No more pages.")
                break

            # Scroll to Next button to make sure it is clickable
            driver.execute_script("arguments[0].scrollIntoView(true);", next_btn)
            time.sleep(1)

            driver.execute_script("arguments[0].click();", next_btn)
            print("➡️ Clicked Next")

            # Scroll upward fast then down slowly
            scroll_after_next(driver)

            page += 1
            time.sleep(3)
        except Exception as e:
            print(f"❌ Failed to click Next: {e}")
            break

    driver.quit()

    # Save to CSV
    df = pd.DataFrame(all_data)
    df.to_csv("sarmaaya_mutual_funds_all.csv", index=False)
    print(f"✅ Saved {len(all_data)} rows to sarmaaya_mutual_funds_all.csv")

if __name__ == "__main__":
    scrape_all_pages()


⌛ Waiting for dropdown...
✅ Selected 100 rows
📄 Scraping page 1...
✅ Scraped 121 rows on page 1
➡️ Clicked Next
📄 Scraping page 2...
✅ Scraped 121 rows on page 2
➡️ Clicked Next
📄 Scraping page 3...
✅ Scraped 121 rows on page 3
➡️ Clicked Next
📄 Scraping page 4...
✅ Scraped 121 rows on page 4
➡️ Clicked Next
📄 Scraping page 5...
✅ Scraped 121 rows on page 5
➡️ Clicked Next
📄 Scraping page 6...
✅ Scraped 25 rows on page 6
🛑 No more pages.
✅ Saved 630 rows to sarmaaya_mutual_funds_all.csv
