In [1]:
import pandas as pd
import yfinance as yf

In [2]:
class YahooClient:
    def __init__(self, ticker: str):
        self.ticker = ticker
        self._yf = yf.Ticker(ticker)

    def identification(self):
        isin = self._yf.get_isin()
        ticker = self.ticker
        return {"ticker": ticker, "isin": isin}
    
    def dividends(self):
        return self._yf.get_dividends()
    
    def shares_outstanding(self):
        return self._yf.get_shares_full()
    
    def income_statement(self):
        return self._yf.get_income_stmt()
    
    def balance_sheet(self):
        return self._yf.get_balance_sheet()

    def cashflow(self):
        return self._yf.get_cashflow()
    
    def full_data(self):
        return {
            "identification": self.identification(),
            "dividends": self.dividends(),
            "shares_outstanding": self.shares_outstanding(),
            "income_statement": self.income_statement(),
            "balance_sheet": self.balance_sheet(),
            "cashflow": self.cashflow()
        }




In [14]:
tickers = pd.read_csv("tickers.csv")['0'].tolist()
count = 0
feature_names = []
for ticker in tickers:
    client = YahooClient(ticker)
    bs = client.balance_sheet()
    if bs is not None:
        feature_names += bs.index.tolist()
    else:
        print(f"Missing balance sheet for {ticker}")

pd.DataFrame(feature_names).value_counts()

CommonStockEquity                      5744
TotalLiabilitiesNetMinorityInterest    5744
TotalAssets                            5744
TangibleBookValue                      5744
NetTangibleAssets                      5744
                                       ... 
UnrealizedGainLoss                       67
TotalPartnershipCapital                  59
LimitedPartnershipCapital                56
GeneralPartnershipCapital                30
RestrictedCommonStock                     9
Name: count, Length: 144, dtype: int64

In [22]:
pd.DataFrame(feature_names).value_counts().to_csv("feature_names.csv", index=True)

In [16]:
with pd.option_context(
    "display.max_rows", None,
    "display.max_columns", None,
    "display.width", None,
    "display.max_colwidth", None
):
    print(pd.DataFrame(feature_names).value_counts())

CommonStockEquity                                               5744
TotalLiabilitiesNetMinorityInterest                             5744
TotalAssets                                                     5744
TangibleBookValue                                               5744
NetTangibleAssets                                               5744
StockholdersEquity                                              5744
TotalEquityGrossMinorityInterest                                5743
OrdinarySharesNumber                                            5734
ShareIssued                                                     5734
CashAndCashEquivalents                                          5719
CapitalStock                                                    5702
CommonStock                                                     5695
InvestedCapital                                                 5660
TotalCapitalization                                             5660
TotalDebt                         

In [17]:
tickers = pd.read_csv("tickers.csv")['0'].tolist()
count = 0
feature_names_inc = []
for ticker in tickers:
    client = YahooClient(ticker)
    bs = client.income_statement()
    if bs is not None:
        feature_names_inc += bs.index.tolist()
    else:
        print(f"Missing income statement for {ticker}")

pd.DataFrame(feature_names_inc).value_counts()

NormalizedIncome                    5744
TaxEffectOfUnusualItems             5743
TaxRateForCalcs                     5743
NetIncomeCommonStockholders         5740
NetIncome                           5740
                                    ... 
PolicyholderBenefitsCeded             48
SecuritiesAmortization                24
ExciseTaxes                           16
NetIncomeFromTaxLossCarryforward       8
DepletionIncomeStatement               5
Name: count, Length: 84, dtype: int64

In [21]:
pd.DataFrame(feature_names_inc).value_counts().to_csv("feature_names_inc.csv", index=True)

In [19]:
tickers = pd.read_csv("tickers.csv")['0'].tolist()
count = 0
feature_names_cash = []
for ticker in tickers:
    client = YahooClient(ticker)
    bs = client.cashflow()
    if bs is not None:
        feature_names_cash += bs.index.tolist()
    else:
        print(f"Missing cashflow for {ticker}")

pd.DataFrame(feature_names_cash).value_counts()

FreeCashFlow                    5584
EndCashPosition                 5577
ChangesInCash                   5577
BeginningCashPosition           5574
FinancingCashFlow               5573
                                ... 
InterestPaidDirect                14
ChangeInDividendPayable            9
DividendsReceivedDirect            8
DividendsPaidDirect                2
ReceiptsfromGovernmentGrants       1
Name: count, Length: 120, dtype: int64

In [23]:
pd.DataFrame(feature_names_cash).value_counts().to_csv("feature_names_cash.csv", index=True)