In [2]:
import pandas as pd
import yfinance as yf
from pathlib import Path
import time
import warnings
from datetime import datetime

#TICKER Y ANIO
MIN_YEAR = 2020
MAX_YEAR = datetime.now().year
FREQ = "annual"
PAUSE_S = 0.5
CSV_IN = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets.csv"
OUTPUT_CSV = "/Users/marcomendieta/Documents/TFM/Data_stock/EstadoResultado/EstadoResultado_prueba.csv"

#VARIABLES
with open("/Users/marcomendieta/Documents/TFM/Data_stock/EstadoResultado/VariablesER_explicadas.txt", "r", encoding="utf-8") as f:
    ES_LIST = [
        linea.split('"')[1]
        for linea in f
        if '"' in linea and not linea.strip().startswith("#")
    ]


def _read_tickers(csv_path: str) -> list:
    df = pd.read_csv(csv_path)
    candidates = ["ticker", "TICKER", "Ticker", "symbol", "SYMBOL", "Symbol", "ticker_market"]
    col = next((c for c in candidates if c in df.columns), df.columns[0])
    tickers = (
        df[col]
        .astype(str)
        .str.strip()
        .replace({"": None, "nan": None, "None": None})
        .dropna()
        .unique()
        .tolist()
    )
    return tickers

def _ensure_items_as_index(df: pd.DataFrame) -> pd.DataFrame:
    score_idx = sum(k in df.index for k in ES_LIST)
    score_col = sum(k in df.columns for k in ES_LIST)
    return df.T if score_col > score_idx else df

def _cols_to_datetime(cols):
    if isinstance(cols, (pd.DatetimeIndex, pd.PeriodIndex)):
        return pd.DatetimeIndex(cols)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        return pd.to_datetime(cols, errors="coerce")

def _row_for(df_idx_as_items: pd.DataFrame, ticker: str, col_label, year: int) -> dict:
    out = {"Ticker": ticker, "YEAR": year}
    for key in ES_LIST:
        if key in df_idx_as_items.index and col_label in df_idx_as_items.columns:
            val = df_idx_as_items.loc[key, col_label]
        else:
            val = None
        out[key] = pd.to_numeric(val, errors="coerce") if val is not None else None
    return out

def fetch_income_for_ticker_all_years(ticker: str, freq: str = "annual") -> list[dict]:
    t = yf.Ticker(ticker)
    df = t.quarterly_income_stmt if freq == "quarterly" else t.income_stmt
    if df is None or df.empty:
        return []
    df = _ensure_items_as_index(df.copy())
    if df.empty:
        return []
    cols_dt = _cols_to_datetime(df.columns)
    mask_valid = cols_dt.notna()
    mask_range = (cols_dt.year >= MIN_YEAR) & (cols_dt.year <= MAX_YEAR)
    valid_cols = df.columns[mask_valid & mask_range]
    if len(valid_cols) == 0:
        return []
    valid_cols_sorted = sorted(valid_cols, key=lambda c: _cols_to_datetime([c])[0])
    rows = []
    for c in valid_cols_sorted:
        col_dt = _cols_to_datetime([c])[0]
        rows.append(_row_for(df, ticker, c, int(col_dt.year)))
    return rows

def main():
    tickers = _read_tickers(CSV_IN)
    rows = []
    for tk in tickers:
        try:
            rows_ext = fetch_income_for_ticker_all_years(tk, FREQ)
            if not rows_ext:
                rows.append({"Ticker": tk, "YEAR": None, **{k: None for k in ES_LIST}})
        except Exception as e:
            rows.append({"Ticker": tk, "YEAR": None, **{k: None for k in ES_LIST}, "_error": str(e)})
            time.sleep(PAUSE_S)
            continue
        rows.extend(rows_ext)
        time.sleep(PAUSE_S)
    df_out = pd.DataFrame(rows)
    meta_cols = ["Ticker", "YEAR"]
    cols = meta_cols + ES_LIST + (["_error"] if "_error" in df_out.columns else [])
    df_out = df_out.reindex(columns=[c for c in cols if c in df_out.columns])
    if "YEAR" in df_out.columns:
        df_out = df_out.sort_values(["Ticker", "YEAR"], na_position="last")
    Path(OUTPUT_CSV).parent.mkdir(parents=True, exist_ok=True)
    df_out.to_csv(OUTPUT_CSV, index=False)
    return df_out

if __name__ == "__main__":
    df_res = main()
print("ok")


KeyboardInterrupt: 