In [2]:
import pandas as pd
import yfinance as yf
from pathlib import Path
import time
import warnings

CSV_IN = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets.csv"
OUTPUT_CSV = "/Users/marcomendieta/Documents/TFM/Data_stock/EstadoResultado/estadodecuenta.csv"
FREQ = "annual"
PAUSE_S = 0.5

ES_LIST = [
    # Top line to operating result
    "Total Revenue",
    "Cost Of Revenue",
    "Gross Profit",
    "Research Development",
    "Selling General Administrative",
    "Operating Expense",
    "Other Operating Expenses",
    "Operating Income",
    "Total Other Income Expense Net",
    "Ebit",

    # Interest, tax, minority, net income
    "Interest Expense",
    "Income Before Tax",
    "Tax Provision",
    "Minority Interest",
    "Net Income From Continuing Ops",
    "Discontinued Operations",
    "Extraordinary Items",
    "Effect Of Accounting Charges",
    "Other Items",
    "Net Income",
    "Net Income Applicable To Common Shares",

    # Per-share and share counts
    "Basic EPS",
    "Diluted EPS",
    "Basic Average Shares",
    "Diluted Average Shares",

    # Common standardized extras
    "Normalized Income",
    "Depreciation",
    "Depreciation & Amortization",
    "Depreciation Depletion & Amortization",
    "Preferred Dividends",

    # Banking/insurance lines that Yahoo shows for those sectors
    "Net Interest Income",
    "Provision for Loan Losses",
    "Total Premiums Earned",
]

def _read_tickers(csv_path: str) -> list:
    df = pd.read_csv(csv_path)
    candidates = ["ticker","TICKER","Ticker","symbol","SYMBOL","Symbol"]
    col = next((c for c in candidates if c in df.columns), df.columns[0])
    tickers = (
        df[col].astype(str).str.strip().replace({"": None, "nan": None, "None": None}).dropna().unique().tolist()
    )
    return tickers

def _ensure_items_as_index(df: pd.DataFrame) -> pd.DataFrame:
    score_idx = sum(k in df.index for k in ES_LIST)
    score_col = sum(k in df.columns for k in ES_LIST)
    if score_col > score_idx:
        return df.T
    return df

def _pick_latest_column(df: pd.DataFrame):
    cols = df.columns
    if isinstance(cols, (pd.DatetimeIndex, pd.PeriodIndex)):
        return cols.max()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        cols_dt = pd.to_datetime(cols, errors="coerce")
    if getattr(cols_dt, "notna", lambda: pd.Series([False]*len(cols)))().any():
        return cols[cols_dt.argmax()]
    return cols[0]

def _value_from_index(df: pd.DataFrame, key: str, col):
    if key in df.index:
        return df.loc[key, col]
    return None

def fetch_income_for_ticker(Ticker: str, freq: str = "annual") -> dict:
    t = yf.Ticker(Ticker)
    df = t.quarterly_income_stmt if freq == "quarterly" else t.income_stmt
    if df is None or df.empty:
        return {"Ticker": Ticker, "period_end": None, **{k: None for k in ES_LIST}}
    df = _ensure_items_as_index(df.copy())
    if df.empty:
        return {"Ticker": Ticker, "period_end": None, **{k: None for k in ES_LIST}}
    latest_col = _pick_latest_column(df)
    out = {"Ticker": Ticker, "period_end": str(latest_col)}
    for key in ES_LIST:
        val = _value_from_index(df, key, latest_col)
        out[key] = pd.to_numeric(val, errors="coerce") if val is not None else None
    return out

def main():
    tickers = _read_tickers(CSV_IN)
    rows = []
    for tk in tickers:
        try:
            row = fetch_income_for_ticker(tk, FREQ)
        except Exception as e:
            row = {"Ticker": tk, "period_end": None, **{k: None for k in ES_LIST}}
            row["_error"] = str(e)
        rows.append(row)
        time.sleep(PAUSE_S)
    df_out = pd.DataFrame(rows)
    cols = ["Ticker", "period_end"] + ES_LIST + (["_error"] if "_error" in df_out.columns else [])
    df_out = df_out.reindex(columns=cols)
    Path(OUTPUT_CSV).parent.mkdir(parents=True, exist_ok=True)
    df_out.to_csv(OUTPUT_CSV, index=False)
    return df_out

if __name__ == "__main__":
    df_res = main()


In [3]:
import pandas as pd
import yfinance as yf
from pathlib import Path
import time
import warnings

CSV_IN = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_componentes_indices.csv"
OUTPUT_CSV = "/Users/marcomendieta/Documents/TFM/Data_stock/EstadoResultado/estadodecuenta_prueba.csv"
FREQ = "annual"
PAUSE_S = 0.5

ES_LIST = [
    # Top line to operating result
    "Total Revenue",
    "Cost Of Revenue",
    "Gross Profit",
    "Research Development",
    "Selling General Administrative",
    "Operating Expense",
    "Other Operating Expenses",
    "Operating Income",
    "Total Other Income Expense Net",
    "Ebit",

    # Interest, tax, minority, net income
    "Interest Expense",
    "Income Before Tax",
    "Tax Provision",
    "Minority Interest",
    "Net Income From Continuing Ops",
    "Discontinued Operations",
    "Extraordinary Items",
    "Effect Of Accounting Charges",
    "Other Items",
    "Net Income",
    "Net Income Applicable To Common Shares",

    # Per-share and share counts
    "Basic EPS",
    "Diluted EPS",
    "Basic Average Shares",
    "Diluted Average Shares",

    # Common standardized extras
    "Normalized Income",
    "Depreciation",
    "Depreciation & Amortization",
    "Depreciation Depletion & Amortization",
    "Preferred Dividends",

    # Banking/insurance lines that Yahoo shows for those sectors
    "Net Interest Income",
    "Provision for Loan Losses",
    "Total Premiums Earned",
]

def _read_tickers(csv_path: str) -> list:
    df = pd.read_csv(csv_path)
    candidates = ["ticker","TICKER","Ticker","symbol","SYMBOL","Symbol"]
    col = next((c for c in candidates if c in df.columns), df.columns[0])
    tickers = (
        df[col].astype(str).str.strip().replace({"": None, "nan": None, "None": None}).dropna().unique().tolist()
    )
    return tickers

def _ensure_items_as_index(df: pd.DataFrame) -> pd.DataFrame:
    score_idx = sum(k in df.index for k in ES_LIST)
    score_col = sum(k in df.columns for k in ES_LIST)
    if score_col > score_idx:
        return df.T
    return df

def _pick_latest_column(df: pd.DataFrame):
    cols = df.columns
    if isinstance(cols, (pd.DatetimeIndex, pd.PeriodIndex)):
        return cols.max()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        cols_dt = pd.to_datetime(cols, errors="coerce")
    if getattr(cols_dt, "notna", lambda: pd.Series([False]*len(cols)))().any():
        return cols[cols_dt.argmax()]
    return cols[0]

def _value_from_index(df: pd.DataFrame, key: str, col):
    if key in df.index:
        return df.loc[key, col]
    return None

def fetch_income_for_ticker(Ticker: str, freq: str = "annual") -> dict:
    t = yf.Ticker(Ticker)
    df = t.quarterly_income_stmt if freq == "quarterly" else t.income_stmt
    if df is None or df.empty:
        return {"Ticker": Ticker, "period_end": None, **{k: None for k in ES_LIST}}
    df = _ensure_items_as_index(df.copy())
    if df.empty:
        return {"Ticker": Ticker, "period_end": None, **{k: None for k in ES_LIST}}
    latest_col = _pick_latest_column(df)
    out = {"Ticker": Ticker, "period_end": str(latest_col)}
    for key in ES_LIST:
        val = _value_from_index(df, key, latest_col)
        out[key] = pd.to_numeric(val, errors="coerce") if val is not None else None
    return out

def main():
    tickers = _read_tickers(CSV_IN)
    rows = []
    for tk in tickers:
        try:
            row = fetch_income_for_ticker(tk, FREQ)
        except Exception as e:
            row = {"Ticker": tk, "period_end": None, **{k: None for k in ES_LIST}}
            row["_error"] = str(e)
        rows.append(row)
        time.sleep(PAUSE_S)
    df_out = pd.DataFrame(rows)
    cols = ["Ticker", "period_end"] + ES_LIST + (["_error"] if "_error" in df_out.columns else [])
    df_out = df_out.reindex(columns=cols)
    Path(OUTPUT_CSV).parent.mkdir(parents=True, exist_ok=True)
    df_out.to_csv(OUTPUT_CSV, index=False)
    return df_out

if __name__ == "__main__":
    df_res = main()
