In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# --- 1. Data Generation (Synthetic Data for Demonstration) ---
# In a real-world scenario, you would need to source actual historical data
# for your chosen style factors and asset returns.
# Factor data can be complex to construct from raw financial data (e.g., Quality, Low Volatility).
# For academic factors (like Fama-French, Carhart), Kenneth French's data library is a common source.
# For others, you might use commercial data providers or construct them yourself.

def generate_synthetic_data_multi_style(num_periods=360):
    """
    Generates synthetic historical data for an asset and a broader set of style factors,
    representative of a multi-style model (e.g., AQR Style Premia-like).

    Factors included:
    - Mkt-RF: Market Risk Premium (Market Excess Return)
    - SMB: Small Minus Big (Size)
    - HML: High Minus Low (Value)
    - UMD: Up Minus Down (Momentum)
    - QMJ: Quality Minus Junk (Profitability/Quality)
    - BAB: Betting Against Beta (Low Volatility/Defensive)
    """
    np.random.seed(45) # for reproducibility

    # Simulate risk-free rate (e.g., monthly risk-free rate)
    rf = 0.002 / 12 # ~0.2% annual risk-free, monthly

    # Simulate monthly factor returns with typical means and standard deviations
    # These are illustrative values and would vary based on actual data.
    mkt_rf = np.random.normal(0.005, 0.02, num_periods)  # Market
    smb = np.random.normal(0.002, 0.015, num_periods)    # Size
    hml = np.random.normal(0.001, 0.018, num_periods)    # Value
    umd = np.random.normal(0.004, 0.025, num_periods)    # Momentum
    qmj = np.random.normal(0.003, 0.012, num_periods)    # Quality
    bab = np.random.normal(0.0035, 0.015, num_periods)   # Low Volatility

    # Create a DataFrame for factor returns
    factors_df = pd.DataFrame({
        'Mkt-RF': mkt_rf,
        'SMB': smb,
        'HML': hml,
        'UMD': umd,
        'QMJ': qmj, # Quality factor (Robust Minus Weak - QMJ is a common proxy/similar)
        'BAB': bab  # Low Volatility factor (Betting Against Beta is a common proxy)
    })

    # Simulate asset returns based on factors and some hypothetical betas
    # These betas would be estimated from regression with real data.
    asset_betas = {
        'Mkt-RF': 1.0,   # Market exposure
        'SMB': 0.2,      # Small-cap tilt
        'HML': 0.1,      # Value tilt
        'UMD': 0.15,     # Momentum exposure
        'QMJ': 0.25,     # Quality exposure
        'BAB': -0.05     # Slight negative exposure to low-vol, or near zero if it's not a defensive stock
    }
    alpha = 0.0002 # A small idiosyncratic alpha
    idiosyncratic_noise = np.random.normal(0, 0.007, num_periods) # Asset-specific risk

    # Calculate expected excess return based on factors and betas
    asset_excess_return = (
        alpha
        + factors_df['Mkt-RF'] * asset_betas['Mkt-RF']
        + factors_df['SMB'] * asset_betas['SMB']
        + factors_df['HML'] * asset_betas['HML']
        + factors_df['UMD'] * asset_betas['UMD']
        + factors_df['QMJ'] * asset_betas['QMJ']
        + factors_df['BAB'] * asset_betas['BAB']
        + idiosyncratic_noise
    )

    # Calculate total asset return
    asset_returns = rf + asset_excess_return

    # Combine into a single DataFrame
    data_df = pd.DataFrame({
        'Asset_Return': asset_returns,
        'RF': rf, # Add RF to the DataFrame for convenience
    })
    data_df = pd.concat([data_df, factors_df], axis=1)

    return data_df

# --- 2. Multi-Style Model Implementation (Generalized Regression) ---

def run_multi_style_regression(data_df, factor_cols, asset_col='Asset_Return', risk_free_col='RF'):
    """
    Performs a multiple linear regression for a generalized multi-style factor model.
    Args:
        data_df (pd.DataFrame): DataFrame containing asset returns, risk-free rate, and factor returns.
        factor_cols (list): List of column names for the style factors to include in the model.
        asset_col (str): Name of the column with asset returns.
        risk_free_col (str): Name of the column with risk-free rate.

    Returns:
        statsmodels.regression.linear_model.RegressionResultsWrapper: The regression results object.
    """
    # Calculate excess returns for the asset
    data_df['Asset_Excess_Return'] = data_df[asset_col] - data_df[risk_free_col]

    # Define the dependent variable (Y) and independent variables (X)
    Y = data_df['Asset_Excess_Return']
    X = data_df[factor_cols]

    # Add a constant to the independent variables for the intercept (alpha)
    X = sm.add_constant(X)

    # Perform the OLS (Ordinary Least Squares) regression
    model = sm.OLS(Y, X)
    results = model.fit()

    return results

def calculate_expected_return_multi_style(regression_results, expected_factor_returns, risk_free_rate):
    """
    Calculates the expected return of an asset based on estimated betas from the multi-style model
    and expected future factor returns.
    Args:
        regression_results (statsmodels.regression.linear_model.RegressionResultsWrapper):
            The results object from the regression.
        expected_factor_returns (dict): Dictionary with expected returns for each of the style factors.
                                        e.g., {'Mkt-RF': 0.05, 'SMB': 0.02, ...} (annualized)
        risk_free_rate (float): The current risk-free rate (should be consistent with factor return periodicity).

    Returns:
        tuple: (expected_asset_return, alpha, betas_dict)
               - float: The calculated expected return of the asset.
               - float: The estimated alpha from the regression.
               - dict: A dictionary of estimated betas.
    """
    # Extract alpha and betas from the regression results
    alpha = regression_results.params['const']
    # Ensure only betas for the provided expected_factor_returns are considered
    betas = {factor: regression_results.params.get(factor, 0.0) for factor in expected_factor_returns.keys()}

    # Calculate the sum of (beta * expected factor return)
    factor_contribution = sum(betas[factor] * expected_factor_returns[factor] for factor in expected_factor_returns.keys())

    # Calculate the expected asset return
    expected_asset_return = risk_free_rate + alpha + factor_contribution

    return expected_asset_return, alpha, betas

# --- 3. Example Usage ---

if __name__ == "__main__":
    print("--- Generating Synthetic Data for AQR-like Multi-Style Model ---")
    # Generate 30 years of monthly data
    historical_data_multi_style = generate_synthetic_data_multi_style(num_periods=360)
    print(historical_data_multi_style.head())
    print("\nData Info:")
    historical_data_multi_style.info()

    # Define the factors to use in the regression
    # This list can be customized based on your desired factors
    style_factor_columns = ['Mkt-RF', 'SMB', 'HML', 'UMD', 'QMJ', 'BAB']

    print(f"\n--- Running Multi-Style Regression with factors: {', '.join(style_factor_columns)} ---")
    regression_results_multi_style = run_multi_style_regression(
        historical_data_multi_style,
        factor_cols=style_factor_columns
    )

    print("\nRegression Results Summary (Multi-Style Model):")
    print(regression_results_multi_style.summary())

    # Extracting key results:
    print("\nEstimated Alpha (Intercept):", regression_results_multi_style.params['const'])
    print("Estimated Betas:")
    for factor in style_factor_columns:
        # Use .get() with a default of 0.0 in case a factor isn't in the regression results (e.g., due to perfect collinearity)
        print(f"  {factor}: {regression_results_multi_style.params.get(factor, 0.0):.4f}")
    print(f"R-squared: {regression_results_multi_style.rsquared:.4f}")

    print("\n--- Calculating Expected Return (Multi-Style Model) ---")

    # Define expected future factor returns (these are forecasts, not historical)
    # These should be consistent with the periodicity of your historical data (e.g., monthly)
    expected_monthly_factor_returns_multi_style = {
        'Mkt-RF': 0.005,   # 0.5% monthly (~6% annual)
        'SMB': 0.002,      # 0.2% monthly (~2.4% annual)
        'HML': 0.001,      # 0.1% monthly (~1.2% annual)
        'UMD': 0.003,      # 0.3% monthly (~3.6% annual)
        'QMJ': 0.0025,     # 0.25% monthly (~3% annual)
        'BAB': 0.0015      # 0.15% monthly (~1.8% annual)
    }
    current_risk_free_rate_multi_style = 0.002 / 12 # Consistent with the synthetic data's RF

    expected_asset_return_multi_style, alpha_est_multi_style, betas_est_multi_style = \
        calculate_expected_return_multi_style(
            regression_results_multi_style,
            expected_monthly_factor_returns_multi_style,
            current_risk_free_rate_multi_style
        )

    print(f"\nExpected Monthly Risk-Free Rate: {current_risk_free_rate_multi_style:.4f}")
    print("Expected Monthly Factor Returns:")
    for factor, ret in expected_monthly_factor_returns_multi_style.items():
        print(f"  {factor}: {ret:.4f}")
    print(f"Estimated Alpha: {alpha_est_multi_style:.4f}")
    print("Estimated Betas:")
    for factor, beta_val in betas_est_multi_style.items():
        print(f"  {factor}: {beta_val:.4f}")

    print(f"\nCalculated Expected Monthly Asset Return: {expected_asset_return_multi_style:.4f}")
    print(f"Calculated Expected Annual Asset Return: {(1 + expected_asset_return_multi_style)**12 - 1:.4f}")


--- Generating Synthetic Data for AQR-like Multi-Style Model ---
   Asset_Return        RF    Mkt-RF       SMB       HML       UMD       QMJ  \
0      0.016261  0.000167  0.005527  0.010844 -0.009144  0.005427  0.008611   
1      0.002577  0.000167  0.010206 -0.009179  0.024661  0.008537  0.000802   
2     -0.011969  0.000167 -0.002903 -0.002174 -0.006795 -0.004963 -0.015897   
3      0.005483  0.000167  0.000914 -0.016009  0.022536 -0.015172 -0.003041   
4     -0.028499  0.000167 -0.020433 -0.013509  0.004806 -0.047985  0.009207   

        BAB  
0  0.028721  
1  0.002324  
2  0.011202  
3  0.005518  
4  0.003823  

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Asset_Return  360 non-null    float64
 1   RF            360 non-null    float64
 2   Mkt-RF        360 non-null    float64
 3   SMB           360 non-null    float64
 4