In [3]:
import os
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

# --- Read CSV File ---
df_stocks = pd.read_csv("../data/top_250_stocks.csv")  # Update with the actual path if needed

# --- Set Up Progress Tracking ---
PROGRESS_FILE = "progress_fundamentals.txt"
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        completed_companies = set(line.strip() for line in f.readlines())
else:
    completed_companies = set()

# --- Setup Selenium ---
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in the background
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--start-maximized")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# --- Financial Sections ---
sections = {
    "Quarters": "quarters",
    "Profit & Loss": "profit-loss",
    "Balance Sheet": "balance-sheet",
    "Cash Flow": "cash-flow",
    "Ratios": "ratios"
}

# --- Create Base Folder ---
BASE_FOLDER = "fundamental_data"
os.makedirs(BASE_FOLDER, exist_ok=True)

# --- Start Extracting Data ---
for _, row in df_stocks.iterrows():
    company_name = row["Company Name"]
    symbol = row["Symbol"]

    if company_name in completed_companies:
        print(f"✅ Skipping {company_name}, already processed.")
        continue

    print(f"🚀 Processing {company_name} ({symbol})...")

    # --- Create Folder for the Company ---
    company_folder = os.path.join(BASE_FOLDER, company_name.replace(" ", "_"))
    os.makedirs(company_folder, exist_ok=True)

    # --- Open Company Page on Screener ---
    url = f"https://www.screener.in/company/{symbol}/"
    driver.get(url)
    time.sleep(random.uniform(3, 6))  # Random delay to avoid detection

    for section_name, section_id in sections.items():
        print(f"🔍 Fetching {section_name} data...")

        # --- Click on Section Tab ---
        try:
            tab = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, f"//a[contains(text(), '{section_name}')]"))
            )
            tab.click()
            time.sleep(random.uniform(2, 4))  # Allow time for switch
        except Exception as e:
            print(f"⚠️ Could not click {section_name} Tab: {e}")
            continue

        # --- Scroll Down to Trigger JavaScript Loading ---
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(random.uniform(2, 4))

        # --- Wait for the Table to Load ---
        try:
            table = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, f"//section[contains(@id, '{section_id}')]//table"))
            )
        except Exception as e:
            print(f"❌ Table did not load for {section_name}: {e}")
            continue

        # --- Extract Data from Table ---
        rows = table.find_elements(By.TAG_NAME, "tr")
        data = []
        for row in rows:
            columns = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in columns]
            if row_data:
                data.append(row_data)

        # --- Convert to DataFrame ---
        df = pd.DataFrame(data)

        # --- Save Data as CSV ---
        file_path = os.path.join(company_folder, f"{section_name}.csv")
        df.to_csv(file_path, index=False)
        print(f"📁 {section_name} data saved for {company_name}")

    # --- Mark Company as Processed ---
    with open(PROGRESS_FILE, "a") as f:
        f.write(company_name + "\n")

    print(f"✅ {company_name} data extraction complete.\n")

    # --- Introduce Random Delay to Avoid Blocking ---
    time.sleep(random.uniform(10, 20))

# --- Close Browser ---
driver.quit()

print("\n🎯 All companies processed successfully!")


✅ Skipping 360 ONE WAM Ltd., already processed.
✅ Skipping AU Small Finance Bank Ltd., already processed.
✅ Skipping Aadhar Housing Finance Ltd., already processed.
✅ Skipping Aavas Financiers Ltd., already processed.
✅ Skipping Aditya Birla Capital Ltd., already processed.
✅ Skipping Aditya Birla Sun Life AMC Ltd., already processed.
✅ Skipping Anand Rathi Wealth Ltd., already processed.
✅ Skipping Angel One Ltd., already processed.
✅ Skipping Aptus Value Housing Finance India Ltd., already processed.
✅ Skipping Axis Bank Ltd., already processed.
✅ Skipping BSE Ltd., already processed.
✅ Skipping Bajaj Finance Ltd., already processed.
✅ Skipping Bajaj Finserv Ltd., already processed.
✅ Skipping Bajaj Holdings & Investment Ltd., already processed.
✅ Skipping Bandhan Bank Ltd., already processed.
✅ Skipping Bank of Baroda, already processed.
✅ Skipping Bank of India, already processed.
✅ Skipping Bank of Maharashtra, already processed.
✅ Skipping CRISIL Ltd., already processed.
✅ Skippi

KeyboardInterrupt: 