In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

options = webdriver.ChromeOptions()

options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')

driver = webdriver.Chrome(options=options)
driver.get("https://www.tradingview.com/markets/stocks-usa/market-movers-all-stocks/")
wait = WebDriverWait(driver, 20)

click_count = 0

while True:
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        load_more_btn = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "button-SFwfC2e0")))
        driver.execute_script("arguments[0].click();", load_more_btn)
        click_count += 1

        rows_now = driver.find_elements(By.CLASS_NAME, "row-RdUXZpkv")
        print(f" Clicked 'Load More' {click_count} times — Rows loaded: {len(rows_now)}")
        time.sleep(2)
    except:
        print(" No more 'Load More' button. All rows loaded.")
        break

rows = driver.find_elements(By.CLASS_NAME, "row-RdUXZpkv")
print(f"\n Final total rows found: {len(rows)}\n")

data = []
for index, row in enumerate(rows, 1):
    try:
        columns = row.find_elements(By.TAG_NAME, 'td')
        if len(columns) >= 12:
            symbol_elem = columns[0].find_element(By.CLASS_NAME, "tickerNameBox-GrtoTeat")
            security_elem = columns[0].find_element(By.CLASS_NAME, "tickerDescription-GrtoTeat")

            data.append([
                symbol_elem.text,
                security_elem.text,
                columns[1].text,
                columns[2].text,
                columns[3].text,
                columns[4].text,
                columns[5].text,
                columns[6].text,
                columns[7].text,
                columns[8].text,
                columns[9].text,
                columns[10].text,
                columns[11].text,
            ])

        if index % 50 == 0:
            print(f" Scraped {index} rows so far...")
    except Exception as e:
        print(f" Error reading row {index}:", e)

csv_file = 'tradingview_all_stocks.csv'
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow([
        'Symbol', 'Security Name', 'Price', 'Change %','Volume',
        'Rel Volume', 'Market Cap', 'P/E Ratio', 'EPS(TTM)', 'EPS Growth',
        'Div Yield', 'Sector', 'Analyst Rating'
    ])
    writer.writerows(data)

print(f"\n Total 'Load More' Clicks: {click_count}")
print(f" Total Rows Saved: {len(data)}")
print(f" Data saved to '{csv_file}'")

driver.quit()

🚀 Opening TradingView...
📡 Starting data loading loop...
📊 Rows loaded: 100
🔄 Clicked 'Load More'
📊 Rows loaded: 200
🔄 Clicked 'Load More'
📊 Rows loaded: 300
🔄 Clicked 'Load More'
📊 Rows loaded: 400
🔄 Clicked 'Load More'
📊 Rows loaded: 500
🔄 Clicked 'Load More'
📊 Rows loaded: 600
🔄 Clicked 'Load More'
📊 Rows loaded: 700
🔄 Clicked 'Load More'
📊 Rows loaded: 800
🔄 Clicked 'Load More'
📊 Rows loaded: 900
🔄 Clicked 'Load More'
📊 Rows loaded: 1000
🔄 Clicked 'Load More'
📊 Rows loaded: 1100
🔄 Clicked 'Load More'
📊 Rows loaded: 1200
🔄 Clicked 'Load More'
📊 Rows loaded: 1300
🔄 Clicked 'Load More'
📊 Rows loaded: 1400
🔄 Clicked 'Load More'
📊 Rows loaded: 1500
🔄 Clicked 'Load More'
📊 Rows loaded: 1600
🔄 Clicked 'Load More'
📊 Rows loaded: 1700
🔄 Clicked 'Load More'
📊 Rows loaded: 1800
🔄 Clicked 'Load More'
📊 Rows loaded: 1900
🔄 Clicked 'Load More'
📊 Rows loaded: 2000
🔄 Clicked 'Load More'
📊 Rows loaded: 2100
🔄 Clicked 'Load More'
📊 Rows loaded: 2200
🔄 Clicked 'Load More'
📊 Rows loaded: 2300
🔄 Clicke

In [24]:
import pandas as pd
import re

# Load existing CSV
df = pd.read_csv("us_stocks_all.csv")

# Fix merged 'Symbol+Name' column
def split_symbol_name(text):
    match = re.match(r"([A-Z]{1,6})(.+)", str(text).strip())
    if match:
        return pd.Series([match.group(1), match.group(2).strip()])
    return pd.Series(["", text.strip()])

df[['Symbol', 'Name']] = df['Symbol'].apply(split_symbol_name)

# Reorder columns properly
columns = [
    "Symbol", "Name", "Price", "Change %", "Volume", "Relative Volume",
    "Market Cap", "P/E", "EPS (Diluted) TTM", "EPS Diluted Growth TTM YoY",
    "Div. Yield % TTM", "Sector", "Analyst Rating"
]
df = df[columns]

# Clean encoding issues
df = df.replace({r"â€”": "-", r"â€“": "-", r"â€¯": " ", r"âˆ’": "-", r"USD": " USD"}, regex=True)
df = df.map(lambda x: str(x).replace('\n', ' ') if isinstance(x, str) else x)

# Save cleaned version
df.to_csv("us_stocks_all_cleaned.csv", index=False)
print("✅ Cleaned CSV saved as: us_stocks_all_cleaned.csv")


✅ Cleaned CSV saved as: us_stocks_all_cleaned.csv


In [27]:
import pandas as pd
import re

with open("us_stocks_all_cleaned.csv", "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f if line.strip()]

fixed_rows = []
i = 0
while i < len(lines) - 2:
    # Line 1: symbol
    symbol = lines[i].replace("D", "").strip()
    
    # Line 2: name + price
    name_price_line = lines[i+1].replace("D", "").strip()
    
    # Extract price from end of line
    match = re.match(r"^(.*?)(\d{1,5}\.\d{2})\s*USD$", name_price_line)
    if match:
        name = match.group(1).strip()
        price = match.group(2).strip() + " USD"
    else:
        print(f"⚠️ Skipping due to bad price parse: {name_price_line}")
        i += 3
        continue

    # Line 3: values
    values = lines[i+2].split('\t')
    if len(values) != 11:
        print(f"⚠️ Skipping due to bad value count: {values}")
        i += 3
        continue

    full_row = [symbol, name, price] + values
    fixed_rows.append(full_row)
    i += 3

# Final DataFrame
columns = [
    "Symbol", "Name", "Price", "Change %", "Volume", "Relative Volume",
    "Market Cap", "P/E", "EPS (Diluted) TTM", "EPS Diluted Growth TTM YoY",
    "Div. Yield % TTM", "Sector", "Analyst Rating"
]

df = pd.DataFrame(fixed_rows, columns=columns)

# Clean bad characters
df.replace({
    r"â€¯": " ",
    r"â€”": "-", r"â€“": "-", r"âˆ’": "-", r"\+": "+",
    r"USD": " USD"
}, regex=True, inplace=True)

df.to_csv("us_stocks_all_fixed.csv", index=False)
print("✅ Done. Cleaned CSV with correct `Price` column generated.")


⚠️ Skipping due to bad price parse: AA,"gilent Technologies, Inc.",−1.05%,1.69 M,0.74,30.94 B US,24.94,4.35 US,+3.65%,0.89%,Health technology,Buy,
⚠️ Skipping due to bad price parse: AACTA,res Acquisition Corporation II,0.00%,349.37 K,0.49,698.4 M US,31.24,0.36 US,−3.08%,0.00%,Finance,—,
⚠️ Skipping due to bad price parse: AAMEA,tlantic American Corporation,+1.18%,7.11 K,0.78,35.08 M US,—,−0.09 US,−40.34%,1.16%,Finance,—,
⚠️ Skipping due to bad price parse: AAONAA,"ON, Inc.",−0.84%,291.99 K,0.62,8.06 B US,52.06,1.90 US,−11.55%,0.34%,Producer manufacturing,Buy,
⚠️ Skipping due to bad price parse: AARA,"ardvark Therapeutics, Inc.",+4.90%,22.05 K,0.36,227.81 M US,—,−3.16 US,—,0.00%,Health technology,Strong buy,
⚠️ Skipping due to bad price parse: ABBVA,bbVie Inc.,+0.39%,4.03 M,0.51,323.71 B US,78.14,2.35 US,−30.17%,3.48%,Health technology,Buy,
⚠️ Skipping due to bad price parse: ABEOA,beona Therapeutics Inc.,−2.43%,957.63 K,0.53,328.43 M US,—,−1.34 US,+58.30%,0.00%,Health technology,Stron