In [2]:
import yfinance as yf
import pandas as pd
import time
import random
from typing import Iterable, Tuple, List

In [None]:
class YahooClient:
    def __init__(self, ticker: str):
        self.ticker = ticker
        self._yf = yf.Ticker(ticker)

    def identification(self):
        isin = self._yf.get_isin()
        ticker = self.ticker
        return {"ticker": ticker, "isin": isin}
    
    def dividends(self):
        return self._yf.get_dividends()
    
    def shares_outstanding(self):
        return self._yf.get_shares_full()
    
    def income_statement(self):
        return self._yf.get_income_stmt()
    
    def balance_sheet(self):
        return self._yf.get_balance_sheet()

    def cashflow(self):
        return self._yf.get_cashflow()
    
    def statement_extractor(df: pd.DataFrame, feature_aliases:dict, statement_type:str) -> pd.DataFrame:
        extracted_data = {}
        for feature, aliases in feature_aliases.items():
            for alias in aliases:
                if alias in df.index:
                    extracted_data[feature] = df.loc[alias].iloc[0]  # Extract the value for the feature
                    break
                else:
                    extracted_data[feature] = None  # Set to None if no alias is found
        date_colname = f"Date_{statement_type}"
        extracted_data[date_colname] = df.columns[0]  # Assuming the first column is the year
        return pd.DataFrame([extracted_data])
    
    def extracted_data(self):
        return {
            "identification": self.identification(),
            "dividends": self.dividends(),
            "shares_outstanding": self.shares_outstanding(),
            "income_statement": self.income_statement(),
            "balance_sheet": self.balance_sheet(),
            "cashflow": self.cashflow()
        }




In [None]:
aapl = YahooClient("TSLA")


{'ticker': 'TSLA', 'isin': '-'}

In [5]:
aapl = YahooClient("AAPL")
aapl.balance_sheet()

BALANCE_SHEET_FEATURE_ALIASES = {
    # -------------------------------------------------
    # Unternehmensgröße / Bilanzsumme
    # -------------------------------------------------
    "total_assets": [
        "TotalAssets",
    ],

    # -------------------------------------------------
    # Eigenkapital (ökonomische Substanz)
    # -------------------------------------------------
    "equity": [
        "StockholdersEquity",
        "CommonStockEquity",
        "TotalEquityGrossMinorityInterest",
    ],

    # -------------------------------------------------
    # Materielle Substanz (ohne Goodwill)
    # -------------------------------------------------
    "tangible_equity": [
        "TangibleBookValue",
        "NetTangibleAssets",
    ],

    # -------------------------------------------------
    # Verschuldung
    # -------------------------------------------------
    "total_debt": [
        "TotalDebt",
    ],

    # Net Debt bewusst getrennt (nicht synonym!)
    "net_debt": [
        "NetDebt",
    ],

    # -------------------------------------------------
    # Liquidität / Cash-Exposure
    # -------------------------------------------------
    "cash": [
        "CashAndCashEquivalents",
        "CashCashEquivalentsAndShortTermInvestments",
        "CashFinancial",
    ],

    # -------------------------------------------------
    # Kapitalbindung / Invested Capital
    # -------------------------------------------------
    "invested_capital": [
        "InvestedCapital",
        "TotalCapitalization",
    ],

    # -------------------------------------------------
    # Sachanlagen / Kapitalintensität
    # -------------------------------------------------
    "ppe": [
        "NetPPE",
        "GrossPPE",
    ],

    # -------------------------------------------------
    # Working Capital (zyklische Sensitivität)
    # -------------------------------------------------
    "working_capital": [
        "WorkingCapital",
    ],

    # -------------------------------------------------
    # Aktienanzahl (Skalierung / Normalisierung)
    # -------------------------------------------------
    "shares_outstanding": [
        "OrdinarySharesNumber",
        "ShareIssued",
    ],
}


def extract_balance_sheet(df: pd.DataFrame, feature_aliases: dict) -> pd.DataFrame:
    balance_sheet = {}
    for feature, aliases in feature_aliases.items():
        for alias in aliases:
            if alias in df.index:
                balance_sheet[feature] = df.loc[alias].iloc[0]  # Extract the value for the feature
                break
            else:
                balance_sheet[feature] = None  # Set to None if no alias is found
    balance_sheet["Date_BS"] = df.columns[0]  # Assuming the first column is the year
    return pd.DataFrame([balance_sheet])

extract_balance_sheet(aapl.balance_sheet(), BALANCE_SHEET_FEATURE_ALIASES)

Unnamed: 0,total_assets,equity,tangible_equity,total_debt,net_debt,cash,invested_capital,ppe,working_capital,shares_outstanding,Date_BS
0,359241000000.0,73733000000.0,73733000000.0,98657000000.0,62723000000.0,35934000000.0,172390000000.0,49834000000.0,-17674000000.0,14773260000.0,2025-09-30


In [6]:
CASHFLOW_FEATURE_ALIASES = {
    # -------------------------------------------------
    # Ertragskraft (Bottom Line)
    # -------------------------------------------------
    "net_income": [
        "NetIncomeFromContinuingOperations",
    ],

    # -------------------------------------------------
    # Operativer Cashflow (Kern-Ertragsqualität)
    # -------------------------------------------------
    "operating_cashflow": [
        "OperatingCashFlow",
        "CashFlowFromContinuingOperatingActivities",
    ],

    # -------------------------------------------------
    # Free Cashflow (zentrale Diversifikationsgröße)
    # -------------------------------------------------
    "free_cashflow": [
        "FreeCashFlow",
    ],

    # -------------------------------------------------
    # Investitionen / Kapitalintensität
    # -------------------------------------------------
    "capital_expenditure": [
        "CapitalExpenditure",
        "CapitalExpenditureReported",
        "PurchaseOfPPE",
    ],

    # -------------------------------------------------
    # Investitionssaldo (Netto)
    # -------------------------------------------------
    "net_ppe_investment": [
        "NetPPEPurchaseAndSale",
    ],

    # -------------------------------------------------
    # Finanzierung – Fremdkapital
    # -------------------------------------------------
    "net_debt_issuance": [
        "NetIssuancePaymentsOfDebt",
        "NetLongTermDebtIssuance",
    ],

    # -------------------------------------------------
    # Finanzierung – Eigenkapital
    # -------------------------------------------------
    "net_equity_issuance": [
        "NetCommonStockIssuance",
        "CommonStockIssuance",
        "IssuanceOfCapitalStock",
    ],

    # -------------------------------------------------
    # Aktienrückkäufe (Kapitalstruktur-Signal)
    # -------------------------------------------------
    "share_repurchases": [
        "RepurchaseOfCapitalStock",
        "CommonStockPayments",
    ],

    # -------------------------------------------------
    # Dividenden (Kapitalrückfluss)
    # -------------------------------------------------
    "dividends_paid": [
        "CashDividendsPaid",
        "CommonStockDividendPaid",
    ],

    # -------------------------------------------------
    # Abschreibungen (Non-Cash, Gewinnqualität)
    # -------------------------------------------------
    "depreciation_amortization": [
        "DepreciationAndAmortization",
        "DepreciationAmortizationDepletion",
        "Depreciation",
    ],

    # -------------------------------------------------
    # Stock-based Compensation (Verwässerungsrisiko)
    # -------------------------------------------------
    "stock_based_compensation": [
        "StockBasedCompensation",
    ],
}

def extract_cashflow(df: pd.DataFrame, feature_aliases: dict) -> pd.DataFrame:
    cashflow = {}
    for feature, aliases in feature_aliases.items():
        for alias in aliases:
            if alias in df.index:
                cashflow[feature] = df.loc[alias].iloc[0]  # Extract the value for the feature
                break
            else:
                cashflow[feature] = None  # Set to None if no alias is found
    cashflow["Date_CF"] = df.columns[0]  # Assuming the first column is the year
    return pd.DataFrame([cashflow])


In [7]:
alz = YahooClient("ALV.DE")
incs = alz.cashflow()
extract_cashflow(incs, CASHFLOW_FEATURE_ALIASES)

Unnamed: 0,net_income,operating_cashflow,free_cashflow,capital_expenditure,net_ppe_investment,net_debt_issuance,net_equity_issuance,share_repurchases,dividends_paid,depreciation_amortization,stock_based_compensation,Date_CF
0,10540000000.0,31903000000.0,29988000000.0,-1915000000.0,-1643000000.0,3119000000.0,-1500000000.0,,-5751000000.0,2154000000.0,,2024-12-31


In [None]:
INCOME_STATEMENT_FEATURE_ALIASES = {
    # -------------------------------------------------
    # Umsatz / Unternehmensgröße
    # -------------------------------------------------
    "revenue": [
        "TotalRevenue",
        "OperatingRevenue",
    ],

    # -------------------------------------------------
    # Bruttoergebnis (Kostenstruktur – sehr grob)
    # -------------------------------------------------
    "gross_profit": [
        "GrossProfit",
    ],

    # -------------------------------------------------
    # Operatives Ergebnis
    # -------------------------------------------------
    "operating_income": [
        "OperatingIncome",
        "TotalOperatingIncomeAsReported",
    ],

    # -------------------------------------------------
    # EBIT (unabhängig von Kapitalstruktur)
    # -------------------------------------------------
    "ebit": [
        "EBIT",
    ],

    # -------------------------------------------------
    # EBITDA (Cash-nahe Ertragskraft)
    # -------------------------------------------------
    "ebitda": [
        "EBITDA",
        "NormalizedEBITDA",
    ],

    # -------------------------------------------------
    # Vorsteuerergebnis
    # -------------------------------------------------
    "pretax_income": [
        "PretaxIncome",
    ],

    # -------------------------------------------------
    # Nettoergebnis (Bottom Line)
    # -------------------------------------------------
    "net_income": [
        "NetIncome",
        "NetIncomeCommonStockholders",
        "NetIncomeFromContinuingOperationNetMinorityInterest",
        "NetIncomeContinuousOperations",
    ],

    # -------------------------------------------------
    # Normalisiertes Ergebnis (ohne Sondereffekte)
    # -------------------------------------------------
    "normalized_income": [
        "NormalizedIncome",
    ],

    # -------------------------------------------------
    # Abschreibungen & Amortisation (Non-Cash)
    # -------------------------------------------------
    "depreciation_amortization": [
        "DepreciationAndAmortizationInIncomeStatement",
        "DepreciationAmortizationDepletionIncomeStatement",
        "DepreciationIncomeStatement",
        "Amortization",
    ],

    # -------------------------------------------------
    # Zinsaufwand (Kapitalstruktur-Signal)
    # -------------------------------------------------
    "interest_expense": [
        "InterestExpense",
        "InterestExpenseNonOperating",
        "TotalOtherFinanceCost",
    ],

    # -------------------------------------------------
    # Steueraufwand (nur grob)
    # -------------------------------------------------
    "tax_expense": [
        "TaxProvision",
    ],

    # -------------------------------------------------
    # Forschung & Entwicklung (Innovationsprofil)
    # -------------------------------------------------
    "research_and_development": [
        "ResearchAndDevelopment",
    ],
}
def extract_income_statement(df: pd.DataFrame, feature_aliases: dict) -> pd.DataFrame:
    income_statement = {}
    for feature, aliases in feature_aliases.items():
        for alias in aliases:
            if alias in df.index:
                income_statement[feature] = df.loc[alias].iloc[0]  # Extract the value for the feature
                break
            else:
                income_statement[feature] = None  # Set to None if no alias is found
    income_statement["Date_IS"] = df.columns[0]  # Assuming the first column is the year
    return pd.DataFrame([income_statement])



In [None]:
extract_income_statement(aapl.income_statement(), INCOME_STATEMENT_FEATURE_ALIASES)

0    4.161610e+11
Name: revenue, dtype: float64