### Financial data normalization

In [2]:
class FinDataNormalizer:
    def __init__(self, data):
        """
        Initialize with raw data.
        
        Parameters:
        data (dict): Dictionary containing financial data from whatever source
        """
        self.data = data
        self.standardized_data = {}
        
        # Mapping dictionary to standardize various attribute names
        self.mapping = {
            "ticker": ["ticker", "symbol", "stock_symbol", "asset_code"],
            "date": ["date", "timestamp", "trade_date", "report_date", "datetime", "date_key"],
            "open": ["open", "open_price", "opening", "start_price"],
            "close": ["close", "closing_price", "end_price", "price_close", "price"],
            "high": ["high", "high_price", "day_high"],
            "low": ["low", "low_price", "day_low"],
            "volume": ["volume", "trade_volume", "shares_traded", "turnover", "volume_traded"],
            "dividends": ["dividends", "dividend_amount", "dividend_per_share", "cash_dividend"],
            "splits": ["splits", "split_ratio", "stock_split_ratio", "split_factor", "adjustment_factor"],
            "dimension": ["dimension", "reporting_period", "time_frame", "period_type", "frequency", "interval"],
            "datekey": ["datekey", "date_key", "financial_date", "statement_date"],
            "revenue": ["revenue", "total_revenue", "sales", "gross_revenue"],
            "netinc": ["netinc", "net_income", "net_profit", "earnings"],
            "eps": ["eps", "earnings_per_share", "eps_basic", "eps_diluted", "net_eps"],
            "assets": ["assets", "total_assets", "asset_value"],
            "liabilities": ["liabilities", "total_liabilities", "liability_amount"],
            "equity": ["equity", "shareholders_equity", "equity_value"],
            "cashneq": ["cashneq", "cash_and_equivalents", "cash_on_hand", "liquid_assets"],
            "debt": ["debt", "total_debt", "outstanding_debt", "liabilities"],
            "option_symbol": ["option_symbol", "contract_symbol", "option_id"],
            "underlying_symbol": ["underlying_symbol", "underlying_asset", "underlying_stock", "base_symbol"],
            "expiration_date": ["expiration_date", "expiry", "maturity_date", "exp_date"],
            "strike_price": ["strike_price", "strike", "strike_rate", "option_strike"],
            "option_type": ["option_type", "contract_type", "type"],
            "last_trade_price": ["last_trade_price", "last_price", "recent_trade_price"],
            "bid": ["bid", "current_bid", "bid_price"],
            "ask": ["ask", "current_ask", "ask_price"],
            "open_interest": ["open_interest", "oi", "open_contracts"],
            "implied_volatility": ["implied_volatility", "iv", "imp_volatility", "volatility_percentage"],
            "fiscal_period": ["fiscal_period", "fiscal_quarter", "reporting_period", "estimation_period"],
            "estimate_eps": ["estimate_eps", "estimated_eps", "expected_eps"],
            "actual_eps": ["actual_eps", "reported_eps", "actual_earnings"],
            "surprise": ["surprise", "earnings_surprise", "eps_difference"],
            "announcement_date": ["announcement_date", "release_date", "report_date", "earnings_date"],
            "country": ["country", "nation", "region"],
            "indicator": ["indicator", "indicator_name", "economic_indicator", "metric"],
            "value": ["value", "data_value", "amount", "indicator_value"],
            "units": ["units", "measurement_units", "unit_type", "unit"],
            "frequency": ["frequency", "interval", "periodicity", "data_frequency", "freq"],
            "commodity": ["commodity", "commodity_name", "asset", "product"],
            "price": ["price", "close_price", "commodity_price", "market_price"],
            "currency": ["currency", "currency_code", "quote_currency", "cur"]
        }
        
    def normalize(self):
        """
        Normalize the input data to a standardized format.
        """
        unmatched_keys = set(self.data.keys())  # Track all keys in the input data.

        for standard_key, aliases in self.mapping.items():
            for alias in aliases:
                if alias in self.data:
                    self.standardized_data[standard_key] = self.data[alias]
                    unmatched_keys.discard(alias)  # Remove matched key from the set.
                    break

        # Print any unmatched keys as an error.
        if unmatched_keys:
            for key in unmatched_keys:
                print(f"Error: '{key}' is not a recognized field.")
                
        return self.standardized_data

In [3]:
#Run something like normalizer = FinancialDataNormalizer(raw_data) then normalized_data - normalizer.normalize()
#Here's an example
raw_data = {
    "symbol": "AAPL",
    "trade_date": "2024-11-01",
    "open_price": 175.0,
    "closing_price": 180.0,
    "high_price": 182.5,
    "low_price": 173.0,
    "trade_volume": 1000000,
    "dividend_amount": 0.82,
}

# Initialize and normalize
normalizer = FinDataNormalizer(raw_data)
normalized_data = normalizer.normalize()

# Print the normalized data
print(normalized_data)

{'ticker': 'AAPL', 'date': '2024-11-01', 'open': 175.0, 'close': 180.0, 'high': 182.5, 'low': 173.0, 'volume': 1000000, 'dividends': 0.82}
