### Balance geneal

In [None]:
import re
import time
import pandas as pd
import yfinance as yf
from difflib import get_close_matches

def norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]', '', str(s).lower())

def best_match(target_norm: str, candidates_norm: dict) -> str | None:
    if target_norm in candidates_norm:
        return target_norm
    m = get_close_matches(target_norm, list(candidates_norm.keys()), n=1, cutoff=0.84)
    if m:
        return m[0]
    for c in candidates_norm:
        if target_norm in c or c in target_norm:
            return c
    return None

def to_millions(x):
    try:
        val = float(x)
        return round(val / 1_000_000, 2)
    except (TypeError, ValueError):
        return None

df_tickers = pd.read_csv("/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_componentes_indices.csv", sep=None, engine="python")
tickers = (
    df_tickers["ticker_market"]
    .dropna()
    .astype(str)
    .str.strip()
    .unique()
    .tolist()
)

balance_general_keys = [
#ACTIVO
#corto plazo
    "CashAndCashEquivalents",
    "CashFinancial",
    "CashCashEquivalentsAndShortTermInvestments",
    "TradingSecurities",
    "OtherShortTermInvestments",
    "AccountsReceivable",
    "OtherReceivables",
    "LoansHeldForSale",
    "OtherCurrentAssets",
#largo plazo
    "LongTermInvestments",
    "InvestmentsAndAdvances",
    "Properties",
    "MachineryFurnitureEquipment",
    "ConstructionInProgress",
    "OtherIntangibleAssets",
    "Goodwill",
    "GoodwillAndOtherIntangibleAssets",
    "OtherNonCurrentAssets",
    "DeferredTaxAssets",
    "NetTangibleAssets",
    "TotalAssets",

#PASIVO
#corto plazp
    "AccountsPayable",
    "CurrentNotesPayable",
    "CurrentDebt",
    "CurrentDebtAndCapitalLeaseObligation",
    "CurrentAccruedExpenses",
    "PayablesAndAccruedExpenses",
    "IncomeTaxPayable",
    "TaxesPayable",
    "SecuritiesSoldUnderRepurchaseAgreements",
    "FederalFundsPurchasedAndSecuritiesSoldUnderRepurchaseAgreements",
    "LiabilitiesOfDiscontinuedOperations",
#largo plazo
    "LongTermDebt",
    "LongTermDebtAndCapitalLeaseObligation",
    "NonCurrentDeferredLiabilities",
    "NonCurrentDeferredTaxesLiabilities",
#total de pasivos
    "TotalDebt",
    "NetDebt",
    "TotalLiabilitiesNetMinorityInterest",

    #CAPITAL
    "StockholdersEquity",
    "ShareIssued",
    "CommonStock",
    "CapitalStock",
    "CommonStockEquity",
    "OrdinarySharesNumber",
    "TreasuryStock",
    "AdditionalPaidInCapital",
    "InvestedCapital",
    "TotalCapitalization",
    "RetainedEarnings",
    "UnrealizedGainLoss",
    "GainsLossesNotAffectingRetainedEarnings",
    "ForeignCurrencyTranslationAdjustments",
    "OtherEquityAdjustments",
    "TotalEquityGrossMinorityInterest",
    "RevaluationReserve",
    "TangibleBookValue",
]

balance_general_dict = {}

for ti in tickers:
    retry = 3
    while retry > 0:
        try:
            t = yf.Ticker(ti)
            bs = t.balance_sheet
            if bs is None or bs.empty:
                bs = t.quarterly_balance_sheet

            if bs is None or bs.empty:
                print(f"{ti} no tiene balance general disponible (anual/quarterly).")
                break
            last_period = bs.columns[0]
            norm_index_map = {norm(idx): idx for idx in bs.index}

            data_balance = {}
            for key in balance_general_keys:
                nk = norm(key)
                match_norm = best_match(nk, norm_index_map)
                if match_norm is None:
                    data_balance[key] = None
                    continue
                yahoo_label = norm_index_map[match_norm]
                value = bs.loc[yahoo_label, last_period]
                data_balance[key] = to_millions(value)

            balance_general_dict[ti] = data_balance
            print(f"{ti} procesado correctamente.")
            break

        except Exception as e:
            msg = str(e).lower()
            if "too many requests" in msg or "rate limit" in msg:
                retry -= 1
                wait_time = 10
                print(f"Rate limit detectado para {ti}. Esperando {wait_time}s. Intentos restantes: {retry}")
                time.sleep(wait_time)
            else:
                print(f"Error al procesar {ti}: {e}")
                break

    time.sleep(1)

df_balance_general = pd.DataFrame.from_dict(balance_general_dict, orient='index')
df_balance_general.index.name = "Ticker"
outfile = "balances_generales12.csv"
df_balance_general.to_csv(outfile)
print(f"Archivo guardado como {outfile}")


AIR.PA procesado correctamente.
VIV.PA procesado correctamente.
STM.PA no tiene balance general disponible (anual/quarterly).
VIE.PA procesado correctamente.
SAF.PA procesado correctamente.
BVI.PA procesado correctamente.
EDEN.PA procesado correctamente.
INGA.AS procesado correctamente.
ASMI.AS no tiene balance general disponible (anual/quarterly).
RAND.AS procesado correctamente.
AD.AS procesado correctamente.
MT.AS procesado correctamente.
KPN.AS procesado correctamente.
ASML.AS procesado correctamente.
BESI.AS procesado correctamente.
NN.AS procesado correctamente.
IMCD.AS procesado correctamente.
ASRNL.AS procesado correctamente.
UNA.AS procesado correctamente.
ADYEN.AS procesado correctamente.
PRX.AS procesado correctamente.
CFR.SW procesado correctamente.
ABBN.SW procesado correctamente.
LOGN.SW procesado correctamente.
ZURN.SW procesado correctamente.
SLHN.SW procesado correctamente.
NOVN.SW procesado correctamente.
NESN.SW procesado correctamente.
ROG.SW procesado correctamente

In [None]:
import re
import time
import pandas as pd
import yfinance as yf
from difflib import get_close_matches

def norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]', '', str(s).lower())

def best_match(target_norm: str, candidates_norm: dict) -> str | None:
    if target_norm in candidates_norm:
        return target_norm
    m = get_close_matches(target_norm, list(candidates_norm.keys()), n=1, cutoff=0.84)
    if m:
        return m[0]
    for c in candidates_norm:
        if target_norm in c or c in target_norm:
            return c
    return None

def to_millions(x):
    try:
        val = float(x)
        return round(val / 1_000_000, 2)
    except (TypeError, ValueError):
        return None

df_tickers = pd.read_csv("/Users/marcomendieta/Documents/TFM/Data_stock/Tickets.csv", sep=None, engine="python")
tickers = (
    df_tickers["ticker_market"]
    .dropna()
    .astype(str)
    .str.strip()
    .unique()
    .tolist()
)

balance_general_keys = [
#ACTIVO
#corto plazo
    "CashAndCashEquivalents",
    "CashFinancial",
    "CashCashEquivalentsAndShortTermInvestments",
    "TradingSecurities",
    "OtherShortTermInvestments",
    "AccountsReceivable",
    "OtherReceivables",
    "LoansHeldForSale",
    "OtherCurrentAssets",
#largo plazo
    "LongTermInvestments",
    "InvestmentsAndAdvances",
    "Properties",
    "MachineryFurnitureEquipment",
    "ConstructionInProgress",
    "OtherIntangibleAssets",
    "Goodwill",
    "GoodwillAndOtherIntangibleAssets",
    "OtherNonCurrentAssets",
    "DeferredTaxAssets",
    "NetTangibleAssets",
    "TotalAssets",

#PASIVO
#corto plazp
    "AccountsPayable",
    "CurrentNotesPayable",
    "CurrentDebt",
    "CurrentDebtAndCapitalLeaseObligation",
    "CurrentAccruedExpenses",
    "PayablesAndAccruedExpenses",
    "IncomeTaxPayable",
    "TaxesPayable",
    "SecuritiesSoldUnderRepurchaseAgreements",
    "FederalFundsPurchasedAndSecuritiesSoldUnderRepurchaseAgreements",
    "LiabilitiesOfDiscontinuedOperations",
#largo plazo
    "LongTermDebt",
    "LongTermDebtAndCapitalLeaseObligation",
    "NonCurrentDeferredLiabilities",
    "NonCurrentDeferredTaxesLiabilities",
#total de pasivos
    "TotalDebt",
    "NetDebt",
    "TotalLiabilitiesNetMinorityInterest",

    #CAPITAL
    "StockholdersEquity",
    "ShareIssued",
    "CommonStock",
    "CapitalStock",
    "CommonStockEquity",
    "OrdinarySharesNumber",
    "TreasuryStock",
    "AdditionalPaidInCapital",
    "InvestedCapital",
    "TotalCapitalization",
    "RetainedEarnings",
    "UnrealizedGainLoss",
    "GainsLossesNotAffectingRetainedEarnings",
    "ForeignCurrencyTranslationAdjustments",
    "OtherEquityAdjustments",
    "TotalEquityGrossMinorityInterest",
    "RevaluationReserve",
    "TangibleBookValue",
]


balance_general_dict = {}

for ti in tickers:
    retry = 3
    while retry > 0:
        try:
            t = yf.Ticker(ti)
            bs = t.balance_sheet
            if bs is None or bs.empty:
                bs = t.quarterly_balance_sheet

            if bs is None or bs.empty:
                print(f"{ti} no tiene balance general disponible (anual/quarterly).")
                break
            last_period = bs.columns[0]
            norm_index_map = {norm(idx): idx for idx in bs.index}

            data_balance = {}
            for key in balance_general_keys:
                nk = norm(key)
                match_norm = best_match(nk, norm_index_map)
                if match_norm is None:
                    data_balance[key] = None
                    continue
                yahoo_label = norm_index_map[match_norm]
                value = bs.loc[yahoo_label, last_period]
                data_balance[key] = to_millions(value)

            balance_general_dict[ti] = data_balance
            print(f"{ti} procesado correctamente.")
            break

        except Exception as e:
            msg = str(e).lower()
            if "too many requests" in msg or "rate limit" in msg:
                retry -= 1
                wait_time = 10
                print(f"Rate limit detectado para {ti}. Esperando {wait_time}s. Intentos restantes: {retry}")
                time.sleep(wait_time)
            else:
                print(f"Error al procesar {ti}: {e}")
                break

    time.sleep(1)

df_balance_general = pd.DataFrame.from_dict(balance_general_dict, orient='index')
df_balance_general.index.name = "Ticker"
outfile = "balances_generales.csv"
df_balance_general.to_csv(outfile)
print(f"Archivo guardado como {outfile}")


BASFN.DE no tiene balance general disponible (anual/quarterly).
BAYGN.DE no tiene balance general disponible (anual/quarterly).
ALVG.DE no tiene balance general disponible (anual/quarterly).
ADSGN.DE no tiene balance general disponible (anual/quarterly).
LHAG.DE no tiene balance general disponible (anual/quarterly).
SIEGN.DE no tiene balance general disponible (anual/quarterly).
VOWG.DE no tiene balance general disponible (anual/quarterly).
DBKGN.DE no tiene balance general disponible (anual/quarterly).
CONG.DE no tiene balance general disponible (anual/quarterly).
DPWGN.DE no tiene balance general disponible (anual/quarterly).
DAIGN.DE no tiene balance general disponible (anual/quarterly).
PSHG_P.DE no tiene balance general disponible (anual/quarterly).
HNKG_P.DE no tiene balance general disponible (anual/quarterly).
CBKG.DE no tiene balance general disponible (anual/quarterly).
DB1GN.DE no tiene balance general disponible (anual/quarterly).
BMWG.DE no tiene balance general disponible