# Tools
Shared imports, constants, and utility functions.


In [None]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

try:
    import yfinance as yf
except ImportError as error:
    raise ImportError("yfinance is required. Install with: pip install yfinance") from error

pd.set_option("display.max_columns", 50)

REQUIRED_COMPUSTAT_COLUMNS = ["tic", "fyear", "revenue", "net_income", "stockholders_equity"]
NUMERIC_COMPUSTAT_COLUMNS = ["fyear", "revenue", "net_income", "stockholders_equity"]
MODEL_FEATURE_COLUMNS = ["revenue_growth", "roe", "vol_60", "momentum_6_12"]
DECISION_THRESHOLDS = [0.025, 0.05, 0.075, 0.10]

LOOKBACK_BUFFER_DAYS = 400
PRICE_DOWNLOAD_CHUNK_SIZE = 50
REQUIRED_PRICE_FIELD = "Adj Close"


In [None]:
def normalize_column_names(dataframe):
    standardized_dataframe = dataframe.copy()
    standardized_dataframe.columns = [
        column_name.strip().lower().replace(" ", "_")
        for column_name in standardized_dataframe.columns
    ]
    return standardized_dataframe


def validate_required_columns(dataframe, required_columns):
    missing_columns = [column_name for column_name in required_columns if column_name not in dataframe.columns]
    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")


def split_into_chunks(values, chunk_size):
    return [values[start:start + chunk_size] for start in range(0, len(values), chunk_size)]


def extract_price_field(price_dataframe, field_name=REQUIRED_PRICE_FIELD):
    if isinstance(price_dataframe.columns, pd.MultiIndex):
        if field_name in price_dataframe.columns.get_level_values(0):
            return price_dataframe.xs(field_name, axis=1, level=0).copy()
        if field_name in price_dataframe.columns.get_level_values(1):
            return price_dataframe.xs(field_name, axis=1, level=1).copy()

    if field_name in price_dataframe.columns:
        return price_dataframe[[field_name]].copy()

    raise RuntimeError(f"Required field '{field_name}' not found in Yahoo output.")
