In [1]:
import yfinance as yf
import pandas as pd
import time

In [2]:
def leer_lista_txt(ruta_archivo):
    try:
        with open(ruta_archivo, "r", encoding="utf-8") as f:
            contenido = f.read().strip()
        # Quitar comillas y separar por comas
        lista = [x.strip('"') for x in contenido.split(",")]
        return lista
    except FileNotFoundError:
        print(f"❌ Error: El archivo '{ruta_archivo}' no se encontró.")
        return []
    except Exception as e:
        print(f"❌ Error al leer el archivo: {e}")
        return []


In [3]:
russell_3000 = leer_lista_txt(r'C:\Users\SergioBeamonteGonzal\Documentos Locales\MASTER\Machine Learning\Russell3000.txt')
sp_500 = leer_lista_txt(r'C:\Users\SergioBeamonteGonzal\Documentos Locales\MASTER\Machine Learning\S&P500.txt')
nasdaq_100 = leer_lista_txt(r'C:\Users\SergioBeamonteGonzal\Documentos Locales\MASTER\Machine Learning\NASDAQ100.txt')

print(russell_3000[:50])
print(sp_500[:50])
print(nasdaq_100[:50])

['NVDA', 'MSFT', 'AAPL', 'AMZN', 'AVGO', 'META', 'GOOGL', 'TSLA', 'GOOG', 'BRKB', 'JPM', 'LLY', 'V', 'NFLX', 'XOM', 'ORCL', 'WMT', 'JNJ', 'MA', 'COST', 'ABBV', 'HD', 'PLTR', 'AMD', 'PG', 'BAC', 'UNH', 'GE', 'CVX', 'CSCO', 'WFC', 'KO', 'IBM', 'PM', 'CAT', 'CRM', 'MU', 'GS', 'ABT', 'MCD', 'MRK', 'LIN', 'RTX', 'PEP', 'TMO', 'DIS', 'AXP', 'NOW', 'UBER', 'T']
['NVDA', 'MSFT', 'AAPL', 'AMZN', 'AVGO', 'META', 'GOOGL', 'TSLA', 'GOOG', 'BRKB', 'JPM', 'LLY', 'V', 'NFLX', 'ORCL', 'XOM', 'WMT', 'JNJ', 'MA', 'COST', 'ABBV', 'PLTR', 'HD', 'AMD', 'PG', 'BAC', 'UNH', 'GE', 'CVX', 'CSCO', 'WFC', 'KO', 'IBM', 'CAT', 'PM', 'CRM', 'GS', 'MU', 'ABT', 'MCD', 'MRK', 'LIN', 'RTX', 'PEP', 'TMO', 'DIS', 'MS', 'UBER', 'T', 'AXP']
['NVDA', 'MSFT', 'AAPL', 'AVGO', 'AMZN', 'TSLA', 'META', 'GOOGL', 'GOOG', 'NFLX', 'COST', 'PLTR', 'AMD', 'CSCO', 'TMUS', 'MU', 'PEP', 'LIN', 'SHOP', 'APP', 'AMAT', 'LRCX', 'INTU', 'QCOM', 'BKNG', 'INTC', 'TXN', 'AMGN', 'ISRG', 'GILD', 'KLAC', 'ADBE', 'PANW', 'HON', 'CEG', 'CRWD', 'ADI',

In [4]:
from tqdm import tqdm

def descargar_datos_financieros(russell3000, sp500, nasdaq100, delay=0.05):
    """
    Descarga info, balance sheet y cashflow de los tickers de los índices indicados.
    Crea una tabla combinada con indicadores de presencia en S&P 500 y NASDAQ.

    Parámetros:
        russell3000 (list): Lista de tickers del Russell 3000
        sp500 (list): Lista de tickers del S&P 500
        nasdaq100 (list): Lista de tickers del NASDAQ 100
        delay (float): Tiempo de espera entre consultas (segundos)

    Retorna:
        pd.DataFrame: DataFrame combinado con info, balance_sheet y cashflow
    """

    # Combinar todas las listas y eliminar duplicados
    all_tickers = list(set(russell3000 + sp500 + nasdaq100))
    print(f"Total tickers únicos: {len(all_tickers)}")

    # Listas para almacenar los datos
    rows = []

    # Usamos tqdm para la barra de progreso
    for ticker in tqdm(all_tickers, desc="Descargando datos"):
        try:
            t = yf.Ticker(ticker)

            # INFO
            info = t.info.copy()
            info['Ticker'] = ticker

            # Añadir columnas de presencia en índices
            info['In_SP500'] = ticker in sp500
            info['In_NASDAQ'] = ticker in nasdaq100

            # BALANCE SHEET y CASHFLOW (últimos 5 años)
            bs = t.balance_sheet.iloc[:, :5] if not t.balance_sheet.empty else pd.DataFrame()
            cf = t.cashflow.iloc[:, :5] if not t.cashflow.empty else pd.DataFrame()

            # Convertir balance y cashflow a dicts con sufijos para evitar conflictos de nombres
            bs_dict = {f"BS_{col}": bs[col].to_dict() for col in bs.columns} if not bs.empty else {}
            cf_dict = {f"CF_{col}": cf[col].to_dict() for col in cf.columns} if not cf.empty else {}

            # Combinar todos los datos en un solo diccionario
            combined = {**info, **bs_dict, **cf_dict}
            rows.append(combined)

            # Espera entre consultas para no saturar yfinance
            # time.sleep(delay)

        except Exception as e:
            print(f"Error con {ticker}: {e}")
            continue

    # Crear DataFrame final
    df = pd.DataFrame(rows)
    return df

In [5]:
df_finanzas = descargar_datos_financieros(russell_3000, sp_500, nasdaq_100)

display(df_finanzas.head())

Total tickers únicos: 2598


Descargando datos:   3%|▎         | 82/2598 [01:21<42:11,  1.01s/it]HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: MLIFT"}}}
Descargando datos:   4%|▍         | 116/2598 [02:03<1:47:13,  2.59s/it]HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: LENB"}}}
Descargando datos:  10%|▉         | 247/2598 [04:48<38:41,  1.01it/s]  HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: MSFUT"}}}
Descargando datos:  25%|██▌       | 654/2598 [12:08<39:17,  1.21s/it]  HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: RTYZ5"}}}
Descargando datos:  27%|██▋       | 714/2598 [13:05<29:01,  1.08it/s]HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: P5N994"}}}
Descargand

Unnamed: 0,address1,address2,city,state,zip,country,phone,website,industry,industryKey,...,beta3Year,fundFamily,fundInceptionDate,legalType,threeYearAverageReturn,fiveYearAverageReturn,trailingThreeMonthReturns,trailingThreeMonthNavReturns,netAssets,netExpenseRatio
0,601 Brickell Key Drive,Suite 1080,Miami,FL,33131,United States,510 906 4600,https://www.pulsebiosciences.com,Medical Instruments & Supplies,medical-instruments-supplies,...,,,,,,,,,,
1,9320 Lakeside Boulevard,Suite 300,The Woodlands,TX,77381,United States,281 362 5397,https://www.targethospitality.com,Specialty Business Services,specialty-business-services,...,,,,,,,,,,
2,6920 220th Street SW,,Mountlake Terrace,WA,98043,United States,425 771 5299,https://www.fsbwa.com,Banks - Regional,banks-regional,...,,,,,,,,,,
3,55 Water Street,,New York,NY,10041-0001,United States,212-438-1000,https://www.spglobal.com,Financial Data & Stock Exchanges,financial-data-stock-exchanges,...,,,,,,,,,,
4,12300 Liberty Boulevard,,Englewood,CO,80112,United States,720 875 5400,https://www.libertymedia.com/tracking-stocks/f...,Entertainment,entertainment,...,,,,,,,,,,


In [6]:
# Guardar a CSV
df_finanzas.to_csv("companies_financial_metrics.csv", index=False)
print("✅ Datos combinados guardados en 'companies_financial_metrics.csv'.")

✅ Datos combinados guardados en 'companies_financial_metrics.csv'.
