In [17]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# --- 1. Data Generation (Synthetic Data for Demonstration) ---
# In a real-world application, you would load actual historical data
# for asset returns and factor returns.

def generate_synthetic_data(num_periods=120):
    """
    Generates synthetic historical data for asset returns and factor returns.
    - Risk-Free Rate (RF): Assumed constant for simplicity.
    - Market Risk Premium (Mkt-RF): Simulated with some volatility.
    - Small Minus Big (SMB): Simulated with some volatility.
    - High Minus Low (HML): Simulated with some volatility.
    - Asset Returns: Generated based on factors and some idiosyncratic noise.
    """
    np.random.seed(42) # for reproducibility

    # Simulate risk-free rate (e.g., monthly risk-free rate)
    rf = 0.002 / 12 # ~0.2% annual risk-free, monthly

    # Simulate factor returns (e.g., monthly returns)
    mkt_rf = np.random.normal(0.006, 0.02, num_periods)  # Mean 0.6%, Std 2%
    smb = np.random.normal(0.003, 0.015, num_periods)    # Mean 0.3%, Std 1.5%
    hml = np.random.normal(0.004, 0.018, num_periods)    # Mean 0.4%, Std 1.8%

    # Create a DataFrame for factor returns
    factors_df = pd.DataFrame({
        'Mkt-RF': mkt_rf,
        'SMB': smb,
        'HML': hml
    })

    # Simulate asset returns based on factors and some betas
    # Example betas for a hypothetical asset:
    # market_beta = 1.2
    # smb_beta = 0.5
    # hml_beta = -0.3 (e.g., a growth stock)
    # alpha = 0.001 (e.g., some abnormal return)
    # idiosyncratic_risk = 0.01 (random noise)

    asset_betas = {
        'Mkt-RF': 1.2,
        'SMB': 0.5,
        'HML': -0.3
    }
    alpha = 0.001
    idiosyncratic_noise = np.random.normal(0, 0.01, num_periods)

    # Calculate expected excess return based on factors and betas
    asset_excess_return = (
        alpha
        + factors_df['Mkt-RF'] * asset_betas['Mkt-RF']
        + factors_df['SMB'] * asset_betas['SMB']
        + factors_df['HML'] * asset_betas['HML']
        + idiosyncratic_noise
    )

    # Calculate total asset return
    asset_returns = rf + asset_excess_return

    # Combine into a single DataFrame
    data_df = pd.DataFrame({
        'Asset_Return': asset_returns,
        'RF': rf, # Add RF to the DataFrame for convenience
    })
    data_df = pd.concat([data_df, factors_df], axis=1)

    return data_df

# --- 2. Multi-Factor Model Implementation ---

def run_multi_factor_regression(data_df, asset_col='Asset_Return', risk_free_col='RF', factor_cols=['Mkt-RF', 'SMB', 'HML']):
    """
    Performs a multi-variate linear regression to estimate factor sensitivities (betas).
    Args:
        data_df (pd.DataFrame): DataFrame containing asset returns, risk-free rate, and factor returns.
        asset_col (str): Name of the column with asset returns.
        risk_free_col (str): Name of the column with risk-free rate.
        factor_cols (list): List of column names for the factors.

    Returns:
        statsmodels.regression.linear_model.RegressionResultsWrapper: The regression results object.
    """
    # Calculate excess returns for the asset
    data_df['Asset_Excess_Return'] = data_df[asset_col] - data_df[risk_free_col]

    # Define the dependent variable (Y) and independent variables (X)
    Y = data_df['Asset_Excess_Return']
    X = data_df[factor_cols]

    # Add a constant to the independent variables for the intercept (alpha)
    X = sm.add_constant(X)

    # Perform the OLS (Ordinary Least Squares) regression
    model = sm.OLS(Y, X)
    results = model.fit()

    return results

def calculate_expected_return(regression_results, expected_factor_returns, risk_free_rate):
    """
    Calculates the expected return of an asset based on estimated betas and expected factor returns.
    Args:
        regression_results (statsmodels.regression.linear_model.RegressionResultsWrapper):
            The results object from the regression.
        expected_factor_returns (dict): Dictionary with expected returns for each factor.
                                        e.g., {'Mkt-RF': 0.05, 'SMB': 0.02, 'HML': 0.03} (annualized)
        risk_free_rate (float): The current risk-free rate (should be consistent with factor return periodicity).

    Returns:
        float: The calculated expected return of the asset.
    """
    # Extract alpha and betas from the regression results
    alpha = regression_results.params['const']
    betas = {factor: regression_results.params[factor] for factor in expected_factor_returns.keys()}

    # Calculate the sum of (beta * expected factor return)
    factor_contribution = sum(betas[factor] * expected_factor_returns[factor] for factor in expected_factor_returns.keys())

    # Calculate the expected asset return
    # E[R_i] = R_f + alpha_i + beta_mkt * E[R_mkt-rf] + beta_smb * E[SMB] + beta_hml * E[HML]
    expected_asset_return = risk_free_rate + alpha + factor_contribution

    return expected_asset_return, alpha, betas

# --- 3. Example Usage ---

if __name__ == "__main__":
    print("--- Generating Synthetic Data ---")
    historical_data = generate_synthetic_data(num_periods=240) # 20 years of monthly data
    print(historical_data.head())
    print("\nData Info:")
    historical_data.info()

    print("\n--- Running Multi-Factor Regression ---")
    regression_results = run_multi_factor_regression(historical_data)

    print("\nRegression Results Summary:")
    print(regression_results.summary())

    # Extracting key results:
    print("\nEstimated Alpha (Intercept):", regression_results.params['const'])
    print("Estimated Betas:")
    for factor in ['Mkt-RF', 'SMB', 'HML']:
        print(f"  {factor}: {regression_results.params[factor]:.4f}")
    print(f"R-squared: {regression_results.rsquared:.4f}")

    print("\n--- Calculating Expected Return ---")

    # Define expected future factor returns (these are forecasts, not historical)
    # These should be consistent with the periodicity of your historical data (e.g., monthly)
    # For demonstration, let's assume some monthly expected factor returns
    expected_monthly_factor_returns = {
        'Mkt-RF': 0.005,  # 0.5% monthly market risk premium (~6% annual)
        'SMB': 0.002,     # 0.2% monthly SMB premium (~2.4% annual)
        'HML': 0.003      # 0.3% monthly HML premium (~3.6% annual)
    }
    current_risk_free_rate = 0.002 / 12 # Consistent with the synthetic data's RF

    expected_asset_return, alpha_est, betas_est = calculate_expected_return(
        regression_results,
        expected_monthly_factor_returns,
        current_risk_free_rate
    )

    print(f"\nExpected Monthly Risk-Free Rate: {current_risk_free_rate:.4f}")
    print("Expected Monthly Factor Returns:")
    for factor, ret in expected_monthly_factor_returns.items():
        print(f"  {factor}: {ret:.4f}")
    print(f"Estimated Alpha: {alpha_est:.4f}")
    print("Estimated Betas:")
    for factor, beta_val in betas_est.items():
        print(f"  {factor}: {beta_val:.4f}")

    print(f"\nCalculated Expected Monthly Asset Return: {expected_asset_return:.4f}")
    print(f"Calculated Expected Annual Asset Return: {(1 + expected_asset_return)**12 - 1:.4f}")

--- Generating Synthetic Data ---
   Asset_Return        RF    Mkt-RF       SMB       HML
0      0.018413  0.000167  0.015934 -0.008888  0.001697
1     -0.007454  0.000167  0.003235  0.001279 -0.013200
2      0.023192  0.000167  0.018954  0.010575 -0.024916
3      0.058046  0.000167  0.036461  0.015986  0.007662
4     -0.000162  0.000167  0.001317 -0.015004 -0.009614

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Asset_Return  240 non-null    float64
 1   RF            240 non-null    float64
 2   Mkt-RF        240 non-null    float64
 3   SMB           240 non-null    float64
 4   HML           240 non-null    float64
dtypes: float64(5)
memory usage: 9.5 KB

--- Running Multi-Factor Regression ---

Regression Results Summary:
                             OLS Regression Results                            
Dep. Variable:     Asse

  factor_contribution = sum(betas[factor] * expected_factor_returns[factor] for factor in expected_factor_returns.keys())
