In [1]:
# Import required libraries
import pandas as pd
import yfinance as yf
import time
import re
from datetime import datetime

def get_nyse_breadth_data(start_date, end_date, csv_file_path='NYSE.csv', export_path='nyse_breadth_data.csv'):
    """
    Retrieves NYSE ticker data, calculates daily advances/declines/neutral,
    and exports the data to a CSV file.
    
    Args:
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format
        csv_file_path (str): Path to the NYSE tickers CSV file
        export_path (str): Path for the output CSV file
    
    Returns:
        pd.DataFrame: DataFrame with daily advances, declines, and neutral counts
    """
    print(f"Running NYSE breadth analysis from {start_date} to {end_date}...")
    
    # Step 1: Retrieve NYSE tickers
    try:
        nyse_data = pd.read_csv(csv_file_path)
        tickers = nyse_data['Symbol'].tolist()
        
        # Clean tickers for yfinance
        clean_tickers = []
        for ticker in tickers:
            ticker_str = str(ticker)
            if not re.search(r'[\^/\.\-]', ticker_str):
                clean_tickers.append(ticker_str)
        
        print(f"Retrieved {len(clean_tickers)} clean NYSE ticker symbols")
    except Exception as e:
        print(f"Error retrieving NYSE tickers: {e}")
        return None
    
    # Step 2: Get daily price data in batches
    batch_size = 200
    all_data = pd.DataFrame()
    
    for i in range(0, len(clean_tickers), batch_size):
        batch_tickers = clean_tickers[i:i+batch_size]
        print(f"Downloading data for tickers {i+1} to {min(i+batch_size, len(clean_tickers))}...")
        
        try:
            batch_data = yf.download(batch_tickers, start=start_date, end=end_date, progress=False)
            
            if len(batch_tickers) > 1:
                batch_close = batch_data['Close']
            else:
                batch_close = batch_data['Close'].to_frame(name=batch_tickers[0])
            
            if all_data.empty:
                all_data = batch_close
            else:
                all_data = all_data.join(batch_close, how='outer')
            
            time.sleep(1)  # Avoid hitting API limits
            
        except Exception as e:
            print(f"Error downloading data for batch starting at index {i}: {e}")
    
    print(f"Downloaded daily price data with shape {all_data.shape}")
    
    # Step 3: Calculate daily advances, declines, and neutral
    daily_changes = all_data.pct_change()
    
    advances = (daily_changes > 0).sum(axis=1)
    declines = (daily_changes < 0).sum(axis=1)
    neutral = (daily_changes == 0).sum(axis=1)
    
    # Create DataFrame with results
    breadth_data = pd.DataFrame({
        'Advancers': advances,
        'Decliners': declines,
        'Neutral': neutral
    })
    
    # Drop the first row which has all zeros due to no percentage change calculation
    breadth_data = breadth_data.iloc[1:]
    # Step 4: Export to CSV
    try:
        breadth_data.to_csv(export_path)
        print(f"Successfully exported breadth data to {export_path}")
    except Exception as e:
        print(f"Error exporting data to CSV: {e}")
    
    return breadth_data

# Example usage:
breadth_data = get_nyse_breadth_data('1995-01-01', '2025-04-23', 'NYSE.csv', 'nyse_breadth_2023.csv')


Running NYSE breadth analysis from 1995-01-01 to 2025-04-23...
Retrieved 2356 clean NYSE ticker symbols
Downloading data for tickers 1 to 200...
YF.download() has changed argument auto_adjust default to True
Downloading data for tickers 201 to 400...
Downloading data for tickers 401 to 600...
Downloading data for tickers 601 to 800...



2 Failed downloads:
['ECC           ', 'ETX           ']: YFPricesMissingError('possibly delisted; no price data found  (1d 1995-01-01 -> 2025-04-23) (Yahoo error = "No data found, symbol may be delisted")')


Downloading data for tickers 801 to 1000...
Downloading data for tickers 1001 to 1200...
Downloading data for tickers 1201 to 1400...
Downloading data for tickers 1401 to 1600...
Downloading data for tickers 1601 to 1800...
Downloading data for tickers 1801 to 2000...



2 Failed downloads:
['SFB']: YFPricesMissingError('possibly delisted; no price data found  (1d 1995-01-01 -> 2025-04-23)')
['SAND          ']: YFPricesMissingError('possibly delisted; no price data found  (1d 1995-01-01 -> 2025-04-23) (Yahoo error = "No data found, symbol may be delisted")')


Downloading data for tickers 2001 to 2200...
Downloading data for tickers 2201 to 2356...
Downloaded daily price data with shape (7627, 2356)


  daily_changes = all_data.pct_change()


Successfully exported breadth data to nyse_breadth_2023.csv
