In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# --- 1. Data Generation (Synthetic Data for Demonstration) ---
# In a real-world application, you would load actual historical data
# for asset returns and the Fama-French 5-factor returns.
# Data for these factors (Mkt-RF, SMB, HML, RMW, CMA) can be found
# on Kenneth French's data library website.

def generate_synthetic_data_5_factor(num_periods=120):
    """
    Generates synthetic historical data for asset returns and the Fama-French 5-factor returns.
    - Risk-Free Rate (RF): Assumed constant for simplicity.
    - Factors: Mkt-RF, SMB, HML, RMW, CMA are simulated with typical statistical properties.
    - Asset Returns: Generated based on factors, specific betas, and idiosyncratic noise.
    """
    np.random.seed(43) # for reproducibility

    # Simulate risk-free rate (e.g., monthly risk-free rate)
    rf = 0.002 / 12 # ~0.2% annual risk-free, monthly

    # Simulate factor returns (e.g., monthly returns based on typical premiums)
    mkt_rf = np.random.normal(0.006, 0.02, num_periods)  # Market Risk Premium
    smb = np.random.normal(0.003, 0.015, num_periods)    # Small Minus Big
    hml = np.random.normal(0.004, 0.018, num_periods)    # High Minus Low
    rmw = np.random.normal(0.0025, 0.01, num_periods)    # Robust Minus Weak (Profitability)
    cma = np.random.normal(0.002, 0.012, num_periods)    # Conservative Minus Aggressive (Investment)

    # Create a DataFrame for factor returns
    factors_df = pd.DataFrame({
        'Mkt-RF': mkt_rf,
        'SMB': smb,
        'HML': hml,
        'RMW': rmw,
        'CMA': cma
    })

    # Simulate asset returns based on factors and some hypothetical betas
    # Example betas for a hypothetical asset:
    asset_betas = {
        'Mkt-RF': 1.1,  # Moderately sensitive to market
        'SMB': 0.4,     # Slight small-cap tilt
        'HML': 0.2,     # Slight value tilt
        'RMW': 0.3,     # Positive exposure to profitability
        'CMA': 0.1      # Slight exposure to conservative investment
    }
    alpha = 0.0005 # A small abnormal return not explained by factors
    idiosyncratic_noise = np.random.normal(0, 0.008, num_periods) # Random asset-specific noise

    # Calculate expected excess return based on factors and betas
    asset_excess_return = (
        alpha
        + factors_df['Mkt-RF'] * asset_betas['Mkt-RF']
        + factors_df['SMB'] * asset_betas['SMB']
        + factors_df['HML'] * asset_betas['HML']
        + factors_df['RMW'] * asset_betas['RMW']
        + factors_df['CMA'] * asset_betas['CMA']
        + idiosyncratic_noise
    )

    # Calculate total asset return
    asset_returns = rf + asset_excess_return

    # Combine into a single DataFrame
    data_df = pd.DataFrame({
        'Asset_Return': asset_returns,
        'RF': rf, # Add RF to the DataFrame for convenience
    })
    data_df = pd.concat([data_df, factors_df], axis=1)

    return data_df

# --- 2. Fama-French 5-Factor Model Implementation (Regression) ---

def run_ff_5_factor_regression(data_df, asset_col='Asset_Return', risk_free_col='RF'):
    """
    Performs a multiple linear regression for the Fama-French 5-factor model.
    Args:
        data_df (pd.DataFrame): DataFrame containing asset returns, risk-free rate, and factor returns.
        asset_col (str): Name of the column with asset returns.
        risk_free_col (str): Name of the column with risk-free rate.

    Returns:
        statsmodels.regression.linear_model.RegressionResultsWrapper: The regression results object.
    """
    # Define the 5 factors for the model
    ff_5_factor_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

    # Calculate excess returns for the asset
    data_df['Asset_Excess_Return'] = data_df[asset_col] - data_df[risk_free_col]

    # Define the dependent variable (Y) and independent variables (X)
    Y = data_df['Asset_Excess_Return']
    X = data_df[ff_5_factor_cols]

    # Add a constant to the independent variables for the intercept (alpha)
    X = sm.add_constant(X)

    # Perform the OLS (Ordinary Least Squares) regression
    model = sm.OLS(Y, X)
    results = model.fit()

    return results

def calculate_expected_return_5_factor(regression_results, expected_factor_returns, risk_free_rate):
    """
    Calculates the expected return of an asset based on estimated betas from the 5-factor model
    and expected future factor returns.
    Args:
        regression_results (statsmodels.regression.linear_model.RegressionResultsWrapper):
            The results object from the regression.
        expected_factor_returns (dict): Dictionary with expected returns for each of the 5 factors.
                                        e.g., {'Mkt-RF': 0.05, 'SMB': 0.02, 'HML': 0.03, 'RMW': 0.015, 'CMA': 0.01} (annualized)
        risk_free_rate (float): The current risk-free rate (should be consistent with factor return periodicity).

    Returns:
        tuple: (expected_asset_return, alpha, betas_dict)
               - float: The calculated expected return of the asset.
               - float: The estimated alpha from the regression.
               - dict: A dictionary of estimated betas.
    """
    # Extract alpha and betas from the regression results
    alpha = regression_results.params['const']
    betas = {factor: regression_results.params[factor] for factor in expected_factor_returns.keys()}

    # Calculate the sum of (beta * expected factor return)
    factor_contribution = sum(betas[factor] * expected_factor_returns[factor] for factor in expected_factor_returns.keys())

    # Calculate the expected asset return
    # E[R_i] = R_f + alpha_i + beta_mkt * E[R_mkt-rf] + beta_smb * E[SMB] + ...
    expected_asset_return = risk_free_rate + alpha + factor_contribution

    return expected_asset_return, alpha, betas

# --- 3. Example Usage ---

if __name__ == "__main__":
    print("--- Generating Synthetic Data for Fama-French 5-Factor Model ---")
    # Generate 30 years of monthly data
    historical_data_ff5 = generate_synthetic_data_5_factor(num_periods=360)
    print(historical_data_ff5.head())
    print("\nData Info:")
    historical_data_ff5.info()

    print("\n--- Running Fama-French 5-Factor Regression ---")
    regression_results_ff5 = run_ff_5_factor_regression(historical_data_ff5)

    print("\nRegression Results Summary (Fama-French 5-Factor):")
    print(regression_results_ff5.summary())

    # Extracting key results:
    print("\nEstimated Alpha (Intercept):", regression_results_ff5.params['const'])
    print("Estimated Betas:")
    ff_5_factor_names = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
    for factor in ff_5_factor_names:
        print(f"  {factor}: {regression_results_ff5.params[factor]:.4f}")
    print(f"R-squared: {regression_results_ff5.rsquared:.4f}")

    print("\n--- Calculating Expected Return (Fama-French 5-Factor) ---")

    # Define expected future factor returns (these are forecasts, not historical)
    # These should be consistent with the periodicity of your historical data (e.g., monthly)
    # For demonstration, let's assume some monthly expected factor returns
    expected_monthly_factor_returns_ff5 = {
        'Mkt-RF': 0.005,   # 0.5% monthly market risk premium (~6% annual)
        'SMB': 0.002,      # 0.2% monthly SMB premium (~2.4% annual)
        'HML': 0.001,      # 0.1% monthly HML premium (often lower in 5-factor due to RMW/CMA)
        'RMW': 0.0015,     # 0.15% monthly RMW premium (~1.8% annual)
        'CMA': 0.001      # 0.1% monthly CMA premium (~1.2% annual)
    }
    current_risk_free_rate_ff5 = 0.002 / 12 # Consistent with the synthetic data's RF

    expected_asset_return_ff5, alpha_est_ff5, betas_est_ff5 = calculate_expected_return_5_factor(
        regression_results_ff5,
        expected_monthly_factor_returns_ff5,
        current_risk_free_rate_ff5
    )

    print(f"\nExpected Monthly Risk-Free Rate: {current_risk_free_rate_ff5:.4f}")
    print("Expected Monthly Factor Returns:")
    for factor, ret in expected_monthly_factor_returns_ff5.items():
        print(f"  {factor}: {ret:.4f}")
    print(f"Estimated Alpha: {alpha_est_ff5:.4f}")
    print("Estimated Betas:")
    for factor, beta_val in betas_est_ff5.items():
        print(f"  {factor}: {beta_val:.4f}")

    print(f"\nCalculated Expected Monthly Asset Return: {expected_asset_return_ff5:.4f}")
    print(f"Calculated Expected Annual Asset Return: {(1 + expected_asset_return_ff5)**12 - 1:.4f}")


--- Generating Synthetic Data for Fama-French 5-Factor Model ---
   Asset_Return        RF    Mkt-RF       SMB       HML       RMW       CMA
0      0.014957  0.000167  0.011148  0.008227 -0.016371  0.004403  0.029339
1     -0.021599  0.000167 -0.012170 -0.020275  0.012600  0.010709  0.024534
2      0.006732  0.000167 -0.001570  0.010189  0.035835  0.002235 -0.005911
3     -0.006520  0.000167 -0.004698 -0.005049 -0.010448  0.000680 -0.005242
4      0.021616  0.000167  0.023161 -0.026192 -0.000828 -0.006992  0.020420

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Asset_Return  360 non-null    float64
 1   RF            360 non-null    float64
 2   Mkt-RF        360 non-null    float64
 3   SMB           360 non-null    float64
 4   HML           360 non-null    float64
 5   RMW           360 non-null    float64
 6   CMA           