In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import re
import numpy as np
import pandas as pd

In [2]:
w=webdriver.Chrome()
w.get("https://www.tradingview.com/markets/stocks-usa/market-movers-all-stocks/")
# pd.read_csv()

In [3]:
import pandas as pd
from selenium.webdriver.common.by import By

def extract_table_data(driver):
    """
    Extracts text from the first table with class 'table-Ngq2xrcG' and splits it into a list.
    
    Args:
        driver: Selenium WebDriver instance
        
    Returns:
        list: Split text lines from the table
    """
    tables = driver.find_elements(By.XPATH, "//table[@class='table-Ngq2xrcG']")
    if not tables:
        return []
    # Process only the first table as per original logic
    table_text = tables[0].text.split('\n')
    return table_text

def clean_initial_data(data_list):
    """
    Cleans the initial data list by removing 'D's and taking elements from index 15 onward.
    
    Args:
        data_list (list): Raw data list from table
        
    Returns:
        list: Cleaned data list starting from index 15
    """
    # Remove all 'D' entries
    cleaned = [item for item in data_list if item != 'D']
    # Take elements from index 15 onward
    return cleaned[15:] if len(cleaned) > 15 else []

def remove_duplicate_a_entries(data_list):
    """
    Removes all 'A' entries except possibly the first occurrence, and handles '—' by inserting 'nan'.
    
    Args:
        data_list (list): List to process
        
    Returns:
        list: Processed list with 'A's removed (except first) and 'nan' inserted after '—'
    """
    if not data_list:
        return []
    
    result = [data_list[0]]  # Keep the first element as is
    for i in range(1, len(data_list)):
        item = data_list[i]
        if item != 'A':
            result.append(item)
            # Check if the item ends with '—' and insert 'nan' if so
            try:
                if item[-1] == '—':
                    result.append('nan')
            except (IndexError, TypeError):
                continue
    return result

def group_data_into_rows(data_list):
    """
    Groups the data list into sublists of 4 elements each.
    
    Args:
        data_list (list): Flat list of data
        
    Returns:
        list: List of sublists, each containing 4 elements
    """
    return [data_list[i:i+4] for i in range(0, len(data_list), 4)]

def process_data_string(data_str):
    """
    Processes the data string into individual fields, handling units and multi-word sectors.
    
    Args:
        data_str (str): The data string containing financial metrics
        
    Returns:
        list: Processed fields
    """
    # Clean the string: remove 'USD' and normalize spaces
    cleaned_str = data_str.replace('USD', '').replace('  ', ' ').strip()
    fields = cleaned_str.split(' ')
    
    # Handle volume and market cap by removing anything after '\u202f'
    if len(fields) >= 3:
        fields[2] = fields[2].split('\u202f')[0]
    if len(fields) >= 5:
        fields[4] = fields[4].split('\u202f')[0]
    
    # First 9 fields are individual metrics, rest is sector
    if len(fields) > 9:
        sector = ' '.join(fields[9:])
        return fields[:9] + [sector]
    # Pad with empty strings if fewer than 10 fields
    return fields + [''] * (10 - len(fields))

def process_rows(grouped_data):
    """
    Processes each group into a row with all required fields.
    
    Args:
        grouped_data (list): List of groups [ticker, company, data_str, rating]
        
    Returns:
        list: List of processed rows
    """
    processed_rows = []
    for group in grouped_data:
        if len(group) != 4:
            continue  # Skip incomplete groups
        ticker, company, data_str, rating = group
        try:
            fields = process_data_string(data_str)
            # Replace '—' with 'nan' in fields
            fields = ['nan' if field == '—' else field for field in fields]
            # Construct row: ticker, company, fields, rating
            row = [ticker, company] + fields + [rating]
            processed_rows.append(row)
        except Exception:
            continue  # Skip rows that fail processing
    return processed_rows

def main(driver, output_file='jj.csv'):
    """
    Main function to scrape, process, and save financial data to CSV.
    
    Args:
        driver: Selenium WebDriver instance
        output_file (str): Path to output CSV file
    """
    # Define column names for the DataFrame
    columns = [
        'Ticker', 'Company Name', 'Price', 'Change %', 'Volume', 'Rel Volume',
        'Market Cap', 'P/E', 'EPS dil', 'EPS dil growth', 'Div yield %',
        'Sector', 'Analyst Rating'
    ]
    
    # Extract and process data
    table_data = extract_table_data(driver)
    cleaned_data = clean_initial_data(table_data)
    filtered_data = remove_duplicate_a_entries(cleaned_data)
    grouped_data = group_data_into_rows(filtered_data)
    final_rows = process_rows(grouped_data)
    
    # Create and save DataFrame
    df = pd.DataFrame(final_rows, columns=columns)
    df.to_csv(output_file, index=False)
    print(f"Data saved to {output_file}")
    print(df)

# Example usage (assuming 'w' is your WebDriver instance)
if __name__ == "__main__":
    # Replace 'w' with your actual WebDriver instance
    main(w)

Data saved to jj.csv
   Ticker                           Company Name   Price Change %  Volume  \
0       A             Agilent Technologies, Inc.  121.03   −0.71%    2.11   
1      AA                      Alcoa Corporation   31.96   +3.16%    7.07   
2    AACT        Ares Acquisition Corporation II   11.16   +0.54%    4.75   
3    AADI                  Aadi Bioscience, Inc.    1.93    0.00%  662.31   
4     AAL          American Airlines Group, Inc.   11.46   −8.32%  109.91   
..    ...                                    ...     ...      ...     ...   
95    AEO        American Eagle Outfitters, Inc.   11.49   −5.04%     6.8   
96   AEON                   AEON Biopharma, Inc.  0.7000  −23.91%    3.48   
97    AEP  American Electric Power Company, Inc.  104.43   −2.69%    4.28   
98    AER                   AerCap Holdings N.V.   97.40   +2.25%     2.4   
99   AERT                Aeries Technology, Inc.  0.5740   +1.72%   31.41   

   Rel Volume Market Cap    P/E EPS dil EPS dil growth