### CashFlow

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import time
import sys
from pathlib import Path

path_csv = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_componentes_indices.csv"
out_csv  = "/Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/cashflow.csv"
sleep_s  = 0.5

df = pd.read_csv(path_csv)

col = None
if "ticker_market" in df.columns:
    col = "ticker_market"
else:
    cands = [c for c in df.columns if "ticker" in c.lower()]
    if cands:
        col = cands[0]
    else:
        raise ValueError("No encuentro columna de tickers (esperaba 'ticker_market').")

tickers = (
    df[col]
    .astype(str)
    .str.strip()
    .replace({"": np.nan})
    .dropna()
    .unique()
    .tolist()
)
if not tickers:
    raise ValueError("La lista de tickers está vacía.")

cashflow_keys = [
    # OPERATING
    "DepreciationAndAmortization",
    "OtherNonCashItems",
    "ChangeInWorkingCapital",
    "ChangeInOtherWorkingCapital",
    "ChangesInAccountReceivables",
    "ChangeInAccountPayable",
    "ChangeInOtherCurrentAssets",
    "ChangeInOtherCurrentLiabilities",
    "OperatingCashFlow",
    "CashFlowFromContinuingOperatingActivities",

    # INVESTING
    "CapitalExpenditure",
    "PurchaseOfPropertyPlantAndEquipment",
    "SaleOfPropertyPlantAndEquipment",
    "PurchaseOfIntangibleAssets",
    "SaleOfIntangibleAssets",
    "PurchaseOfInvestment",
    "SaleOfInvestment",
    "PurchaseOfBusiness",
    "SaleOfBusiness",
    "NetIntangiblesPurchaseAndSale",
    "NetInvestmentPurchaseAndSale",
    "NetPPEPurchaseAndSale",
    "NetBusinessPurchaseAndSale",
    "NetOtherInvestingChanges",
    "InvestingCashFlow",
    "CashFlowFromContinuingInvestingActivities",

    # FINANCING
    "IssuanceOfCapitalStock",
    "CommonStockIssuance",
    "RepurchaseOfCapitalStock",
    "CashDividendsPaid",
    "IssuanceOfDebt",
    "RepaymentOfDebt",
    "IssuanceOfLongTermDebt",
    "RepaymentOfLongTermDebt",
    "NetBorrowings",
    "NetOtherFinancingCharges",
    "FinancingCashFlow",
    "CashFlowFromContinuingFinancingActivities",

    # FX & CASH BRIDGE
    "EffectOfExchangeRateChanges",
    "ChangeInCashSupplementalAsReported",
    "BeginningCashPosition",
    "EndCashPosition",

    # OPTIONAL / DERIVED
    "FreeCashFlow",
]

def _to_millions(x):
    try:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return None
        return round(float(x) / 1_000_000, 2)
    except Exception:
        return None

def _extract_date(col):
    if isinstance(col, (tuple, list)):
        for item in col:
            d = pd.to_datetime(item, errors="coerce")
            if pd.notna(d):
                return d
        return pd.NaT
    return pd.to_datetime(col, errors="coerce")

def _most_recent_col(cols):
    dates = [_extract_date(c) for c in cols]
    if all(pd.isna(d) for d in dates):
        return cols[0]
    vals = []
    for d in dates:
        vals.append(d.value if pd.notna(d) else np.nan)
    idx = int(np.nanargmax(vals))
    return cols[idx]

def fetch_cf(tkr: yf.Ticker) -> pd.DataFrame:
    if hasattr(tkr, "get_cashflow"):
        for freq in ("yearly", "quarterly"):
            try:
                cf = tkr.get_cashflow(freq=freq)
                if cf is not None and not cf.empty and len(cf.columns) > 0:
                    return cf
            except Exception:
                pass
        try:
            cf = tkr.get_cashflow(freq="yearly", trailing=True)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("cashflow", "quarterly_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("ttm_cashflow", "trailing_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    return pd.DataFrame()

cashflow_dict = {}
for i, ti in enumerate(tickers, 1):
    try:
        tkr = yf.Ticker(ti)
        cf = fetch_cf(tkr)
        if cf is None or cf.empty or len(cf.columns) == 0:
            # SIN DATA: crear fila con NaN (saldrá vacía en el CSV)
            cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
        else:
            last_col = _most_recent_col(cf.columns)
            cf_index = cf.index.astype(str)
            data_cf = {}
            for key in cashflow_keys:
                v = cf.loc[key, last_col] if key in cf_index else None
                data_cf[key] = _to_millions(v)
            if data_cf.get("FreeCashFlow") is None:
                ocf = data_cf.get("OperatingCashFlow")
                capex = data_cf.get("CapitalExpenditure")
                if ocf is not None and capex is not None:
                    data_cf["FreeCashFlow"] = round(ocf - capex, 2)
            cashflow_dict[ti] = data_cf
    except Exception:
        # Cualquier error: fila con NaN para que exporte vacío
        cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
    time.sleep(sleep_s)

df_cashflow = pd.DataFrame.from_dict(cashflow_dict, orient="index")
df_cashflow.index.name = "Ticker"
df_cashflow = df_cashflow.reindex(columns=cashflow_keys)

# Asegura que None -> NaN para exportar en blanco
df_cashflow = df_cashflow.replace({None: np.nan})

# Exporta con celdas vacías (no 'NaN' ni 'None')
df_cashflow.to_csv(out_csv, index=True, na_rep="")

print(f"OK -> {Path(out_csv).resolve()}"); sys.stdout.flush()


OK -> /Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/cashflow.csv


In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import time
import sys
from pathlib import Path

path_csv = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets.csv"
out_csv  = "/Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/cashflow.csv"
sleep_s  = 0.5

df = pd.read_csv(path_csv)

col = None
if "ticker_market" in df.columns:
    col = "ticker_market"
else:
    cands = [c for c in df.columns if "ticker" in c.lower()]
    if cands:
        col = cands[0]
    else:
        raise ValueError("No encuentro columna de tickers (esperaba 'ticker_market').")

tickers = (
    df[col]
    .astype(str)
    .str.strip()
    .replace({"": np.nan})
    .dropna()
    .unique()
    .tolist()
)
if not tickers:
    raise ValueError("La lista de tickers está vacía.")

cashflow_keys = [
    # OPERATING
    "DepreciationAndAmortization",
    "OtherNonCashItems",
    "ChangeInWorkingCapital",
    "ChangeInOtherWorkingCapital",
    "ChangesInAccountReceivables",
    "ChangeInAccountPayable",
    "ChangeInOtherCurrentAssets",
    "ChangeInOtherCurrentLiabilities",
    "OperatingCashFlow",
    "CashFlowFromContinuingOperatingActivities",

    # INVESTING
    "CapitalExpenditure",
    "PurchaseOfPropertyPlantAndEquipment",
    "SaleOfPropertyPlantAndEquipment",
    "PurchaseOfIntangibleAssets",
    "SaleOfIntangibleAssets",
    "PurchaseOfInvestment",
    "SaleOfInvestment",
    "PurchaseOfBusiness",
    "SaleOfBusiness",
    "NetIntangiblesPurchaseAndSale",
    "NetInvestmentPurchaseAndSale",
    "NetPPEPurchaseAndSale",
    "NetBusinessPurchaseAndSale",
    "NetOtherInvestingChanges",
    "InvestingCashFlow",
    "CashFlowFromContinuingInvestingActivities",

    # FINANCING
    "IssuanceOfCapitalStock",
    "CommonStockIssuance",
    "RepurchaseOfCapitalStock",
    "CashDividendsPaid",
    "IssuanceOfDebt",
    "RepaymentOfDebt",
    "IssuanceOfLongTermDebt",
    "RepaymentOfLongTermDebt",
    "NetBorrowings",
    "NetOtherFinancingCharges",
    "FinancingCashFlow",
    "CashFlowFromContinuingFinancingActivities",

    # FX & CASH BRIDGE
    "EffectOfExchangeRateChanges",
    "ChangeInCashSupplementalAsReported",
    "BeginningCashPosition",
    "EndCashPosition",

    # OPTIONAL / DERIVED
    "FreeCashFlow",
]

def _to_millions(x):
    try:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return None
        return round(float(x) / 1_000_000, 2)
    except Exception:
        return None

def _extract_date(col):
    if isinstance(col, (tuple, list)):
        for item in col:
            d = pd.to_datetime(item, errors="coerce")
            if pd.notna(d):
                return d
        return pd.NaT
    return pd.to_datetime(col, errors="coerce")

def _most_recent_col(cols):
    dates = [_extract_date(c) for c in cols]
    if all(pd.isna(d) for d in dates):
        return cols[0]
    vals = []
    for d in dates:
        vals.append(d.value if pd.notna(d) else np.nan)
    idx = int(np.nanargmax(vals))
    return cols[idx]

def fetch_cf(tkr: yf.Ticker) -> pd.DataFrame:
    if hasattr(tkr, "get_cashflow"):
        for freq in ("yearly", "quarterly"):
            try:
                cf = tkr.get_cashflow(freq=freq)
                if cf is not None and not cf.empty and len(cf.columns) > 0:
                    return cf
            except Exception:
                pass
        try:
            cf = tkr.get_cashflow(freq="yearly", trailing=True)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("cashflow", "quarterly_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("ttm_cashflow", "trailing_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    return pd.DataFrame()

cashflow_dict = {}
for i, ti in enumerate(tickers, 1):
    try:
        tkr = yf.Ticker(ti)
        cf = fetch_cf(tkr)
        if cf is None or cf.empty or len(cf.columns) == 0:
            # SIN DATA: crear fila con NaN (saldrá vacía en el CSV)
            cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
        else:
            last_col = _most_recent_col(cf.columns)
            cf_index = cf.index.astype(str)
            data_cf = {}
            for key in cashflow_keys:
                v = cf.loc[key, last_col] if key in cf_index else None
                data_cf[key] = _to_millions(v)
            if data_cf.get("FreeCashFlow") is None:
                ocf = data_cf.get("OperatingCashFlow")
                capex = data_cf.get("CapitalExpenditure")
                if ocf is not None and capex is not None:
                    data_cf["FreeCashFlow"] = round(ocf - capex, 2)
            cashflow_dict[ti] = data_cf
    except Exception:
        # Cualquier error: fila con NaN para que exporte vacío
        cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
    time.sleep(sleep_s)

df_cashflow = pd.DataFrame.from_dict(cashflow_dict, orient="index")
df_cashflow.index.name = "Ticker"
df_cashflow = df_cashflow.reindex(columns=cashflow_keys)

# Asegura que None -> NaN para exportar en blanco
df_cashflow = df_cashflow.replace({None: np.nan})

# Exporta con celdas vacías (no 'NaN' ni 'None')
df_cashflow.to_csv(out_csv, index=True, na_rep="")

print(f"OK -> {Path(out_csv).resolve()}"); sys.stdout.flush()
