### CashFlow

# CashFlow INDICES EUROPA

In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
import time
import sys
from pathlib import Path
import re
from datetime import datetime
from difflib import get_close_matches

#VARIABLES
with open("/Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/VariablesCF.txt", "r", encoding="utf-8") as f:
    cashflow_keys = [
        linea.split('"')[1]
        for linea in f
        if '"' in linea and not linea.strip().startswith("#")
    ]

#TICKER Y ANIO
MIN_YEAR = 2020
MAX_YEAR = datetime.now().year
years = list(range(MIN_YEAR, MAX_YEAR + 1))

path_csv = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_componentes_indices.csv"
out_csv  = "/Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/cashflow_prueba.csv"
sleep_s  = 0.5

def norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]', '', str(s).lower())

def best_match(target_norm: str, candidates_norm: dict) -> str | None:
    if target_norm in candidates_norm:
        return target_norm
    m = get_close_matches(target_norm, list(candidates_norm.keys()), n=1, cutoff=0.84)
    if m:
        return m[0]
    for c in candidates_norm:
        if target_norm in c or c in target_norm:
            return c
    return None

def _to_millions(x):
    try:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return None
        return round(float(x) / 1_000_000, 2)
    except Exception:
        return None

def _extract_date(col):
    if isinstance(col, (tuple, list)):
        for item in col:
            d = pd.to_datetime(item, errors="coerce")
            if pd.notna(d):
                return d
        return pd.NaT
    return pd.to_datetime(col, errors="coerce")

def _col_year(col):
    d = _extract_date(col)
    if pd.notna(d):
        return int(d.year)
    s = str(col)
    m = re.match(r'^\D*(\d{4})', s)
    return int(m.group(1)) if m else None

def _pick_col_for_year(df, year: int):
    if df is None or df.empty:
        return None
    cands = [c for c in df.columns if _col_year(c) == year]
    if not cands:
        return None
    try:
        cands_sorted = sorted(cands, key=lambda c: pd.to_datetime(c, errors="coerce"), reverse=True)
        return cands_sorted[0]
    except Exception:
        return cands[0]

def _fetch_both_cf(tkr: yf.Ticker):
    ann = None
    qtr = None
    try:
        ann = tkr.cashflow
    except Exception:
        pass
    try:
        qtr = tkr.quarterly_cashflow
    except Exception:
        pass
    if (ann is None or ann.empty) and hasattr(tkr, "get_cashflow"):
        for freq in ("yearly", "quarterly"):
            try:
                cf = tkr.get_cashflow(freq=freq)
                if freq == "yearly":
                    ann = cf if cf is not None and not cf.empty else ann
                else:
                    qtr = cf if cf is not None and not cf.empty else qtr
            except Exception:
                pass
    return ann, qtr

df = pd.read_csv(path_csv)
if "ticker_market" in df.columns:
    col = "ticker_market"
else:
    cands = [c for c in df.columns if "ticker" in c.lower()]
    if cands:
        col = cands[0]
    else:
        raise ValueError("No encuentro columna de tickers (esperaba 'ticker_market').")

tickers = (
    df[col].astype(str).str.strip().replace({"": np.nan}).dropna().unique().tolist()
)
if not tickers:
    raise ValueError("La lista de tickers está vacía.")

rows = []

for ti in tickers:
    try:
        tkr = yf.Ticker(ti)
        ann, qtr = _fetch_both_cf(tkr)
        ann_map = {norm(idx): idx for idx in (ann.index if ann is not None and not ann.empty else [])}
        qtr_map = {norm(idx): idx for idx in (qtr.index if qtr is not None and not qtr.empty else [])}

        if (ann is None or ann.empty) and (qtr is None or qtr.empty):
            for yr in years:
                row = {"Ticker": ti, "Year": yr}
                for key in cashflow_keys:
                    row[key] = np.nan
                rows.append(row)
        else:
            for yr in years:
                col_use = _pick_col_for_year(ann, yr)
                src = "ann"
                if col_use is None:
                    col_use = _pick_col_for_year(qtr, yr)
                    src = "qtr" if col_use is not None else None

                row = {"Ticker": ti, "Year": yr}
                for key in cashflow_keys:
                    v = None
                    if src == "ann" and ann is not None and not ann.empty:
                        mk = best_match(norm(key), ann_map)
                        if mk is not None:
                            lbl = ann_map[mk]
                            try:
                                v = ann.loc[lbl, col_use]
                            except Exception:
                                v = None
                    elif src == "qtr" and qtr is not None and not qtr.empty:
                        mk = best_match(norm(key), qtr_map)
                        if mk is not None:
                            lbl = qtr_map[mk]
                            try:
                                v = qtr.loc[lbl, col_use]
                            except Exception:
                                v = None
                    row[key] = _to_millions(v)

                if row.get("FreeCashFlow") is None:
                    ocf = row.get("OperatingCashFlow")
                    capex = row.get("CapitalExpenditure")
                    if ocf is not None and capex is not None:
                        row["FreeCashFlow"] = round(ocf - capex, 2)

                rows.append(row)
    except Exception:
        for yr in years:
            row = {"Ticker": ti, "Year": yr}
            for key in cashflow_keys:
                row[key] = np.nan
            rows.append(row)
    time.sleep(sleep_s)
df_cashflow = pd.DataFrame(rows)
cols = ["Ticker", "Year"] + cashflow_keys
for k in cashflow_keys:
    if k not in df_cashflow.columns:
        df_cashflow[k] = np.nan
df_cashflow = df_cashflow[cols].sort_values(["Ticker", "Year"]).replace({None: np.nan})
df_cashflow.to_csv(out_csv, index=False, na_rep="")
print("OK")


OK


# CashFlow TOTAL EUROPA

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import time
import sys
from pathlib import Path

path_csv = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets.csv"
out_csv  = "/Users/marcomendieta/Documents/TFM/Data_stock/CashFlow/cashflow.csv"
sleep_s  = 0.5

df = pd.read_csv(path_csv)

col = None
if "ticker_market" in df.columns:
    col = "ticker_market"
else:
    cands = [c for c in df.columns if "ticker" in c.lower()]
    if cands:
        col = cands[0]
    else:
        raise ValueError("No encuentro columna de tickers (esperaba 'ticker_market').")

tickers = (
    df[col]
    .astype(str)
    .str.strip()
    .replace({"": np.nan})
    .dropna()
    .unique()
    .tolist()
)
if not tickers:
    raise ValueError("La lista de tickers está vacía.")

cashflow_keys = [
    # OPERATING
    "DepreciationAndAmortization",
    "OtherNonCashItems",
    "ChangeInWorkingCapital",
    "ChangeInOtherWorkingCapital",
    "ChangesInAccountReceivables",
    "ChangeInAccountPayable",
    "ChangeInOtherCurrentAssets",
    "ChangeInOtherCurrentLiabilities",
    "OperatingCashFlow",
    "CashFlowFromContinuingOperatingActivities",

    # INVESTING
    "CapitalExpenditure",
    "PurchaseOfPropertyPlantAndEquipment",
    "SaleOfPropertyPlantAndEquipment",
    "PurchaseOfIntangibleAssets",
    "SaleOfIntangibleAssets",
    "PurchaseOfInvestment",
    "SaleOfInvestment",
    "PurchaseOfBusiness",
    "SaleOfBusiness",
    "NetIntangiblesPurchaseAndSale",
    "NetInvestmentPurchaseAndSale",
    "NetPPEPurchaseAndSale",
    "NetBusinessPurchaseAndSale",
    "NetOtherInvestingChanges",
    "InvestingCashFlow",
    "CashFlowFromContinuingInvestingActivities",

    # FINANCING
    "IssuanceOfCapitalStock",
    "CommonStockIssuance",
    "RepurchaseOfCapitalStock",
    "CashDividendsPaid",
    "IssuanceOfDebt",
    "RepaymentOfDebt",
    "IssuanceOfLongTermDebt",
    "RepaymentOfLongTermDebt",
    "NetBorrowings",
    "NetOtherFinancingCharges",
    "FinancingCashFlow",
    "CashFlowFromContinuingFinancingActivities",

    # FX & CASH BRIDGE
    "EffectOfExchangeRateChanges",
    "ChangeInCashSupplementalAsReported",
    "BeginningCashPosition",
    "EndCashPosition",

    # OPTIONAL / DERIVED
    "FreeCashFlow",
]

def _to_millions(x):
    try:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return None
        return round(float(x) / 1_000_000, 2)
    except Exception:
        return None

def _extract_date(col):
    if isinstance(col, (tuple, list)):
        for item in col:
            d = pd.to_datetime(item, errors="coerce")
            if pd.notna(d):
                return d
        return pd.NaT
    return pd.to_datetime(col, errors="coerce")

def _most_recent_col(cols):
    dates = [_extract_date(c) for c in cols]
    if all(pd.isna(d) for d in dates):
        return cols[0]
    vals = []
    for d in dates:
        vals.append(d.value if pd.notna(d) else np.nan)
    idx = int(np.nanargmax(vals))
    return cols[idx]

def fetch_cf(tkr: yf.Ticker) -> pd.DataFrame:
    if hasattr(tkr, "get_cashflow"):
        for freq in ("yearly", "quarterly"):
            try:
                cf = tkr.get_cashflow(freq=freq)
                if cf is not None and not cf.empty and len(cf.columns) > 0:
                    return cf
            except Exception:
                pass
        try:
            cf = tkr.get_cashflow(freq="yearly", trailing=True)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("cashflow", "quarterly_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    for attr in ("ttm_cashflow", "trailing_cashflow"):
        try:
            cf = getattr(tkr, attr)
            if cf is not None and not cf.empty and len(cf.columns) > 0:
                return cf
        except Exception:
            pass
    return pd.DataFrame()

cashflow_dict = {}
for i, ti in enumerate(tickers, 1):
    try:
        tkr = yf.Ticker(ti)
        cf = fetch_cf(tkr)
        if cf is None or cf.empty or len(cf.columns) == 0:
            # SIN DATA: crear fila con NaN (saldrá vacía en el CSV)
            cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
        else:
            last_col = _most_recent_col(cf.columns)
            cf_index = cf.index.astype(str)
            data_cf = {}
            for key in cashflow_keys:
                v = cf.loc[key, last_col] if key in cf_index else None
                data_cf[key] = _to_millions(v)
            if data_cf.get("FreeCashFlow") is None:
                ocf = data_cf.get("OperatingCashFlow")
                capex = data_cf.get("CapitalExpenditure")
                if ocf is not None and capex is not None:
                    data_cf["FreeCashFlow"] = round(ocf - capex, 2)
            cashflow_dict[ti] = data_cf
    except Exception:
        # Cualquier error: fila con NaN para que exporte vacío
        cashflow_dict[ti] = {k: np.nan for k in cashflow_keys}
    time.sleep(sleep_s)

df_cashflow = pd.DataFrame.from_dict(cashflow_dict, orient="index")
df_cashflow.index.name = "Ticker"
df_cashflow = df_cashflow.reindex(columns=cashflow_keys)

# Asegura que None -> NaN para exportar en blanco
df_cashflow = df_cashflow.replace({None: np.nan})

# Exporta con celdas vacías (no 'NaN' ni 'None')
df_cashflow.to_csv(out_csv, index=True, na_rep="")

print(f"OK -> {Path(out_csv).resolve()}"); sys.stdout.flush()
