In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# --- 1. Data Generation (Synthetic Data for Demonstration) ---
# In a real-world application, you would load actual historical data
# for asset returns and the Carhart 4-factor returns (Mkt-RF, SMB, HML, UMD).
# Data for these factors can be found on Kenneth French's data library website.

def generate_synthetic_data_carhart_4_factor(num_periods=120):
    """
    Generates synthetic historical data for asset returns and the Carhart 4-factor returns.
    - Risk-Free Rate (RF): Assumed constant for simplicity.
    - Factors: Mkt-RF, SMB, HML, UMD are simulated with typical statistical properties.
    - Asset Returns: Generated based on factors, specific betas, and idiosyncratic noise.
    """
    np.random.seed(44) # for reproducibility

    # Simulate risk-free rate (e.g., monthly risk-free rate)
    rf = 0.002 / 12 # ~0.2% annual risk-free, monthly

    # Simulate factor returns (e.g., monthly returns based on typical premiums)
    mkt_rf = np.random.normal(0.006, 0.02, num_periods)  # Market Risk Premium
    smb = np.random.normal(0.003, 0.015, num_periods)    # Small Minus Big
    hml = np.random.normal(0.004, 0.018, num_periods)    # High Minus Low
    umd = np.random.normal(0.007, 0.025, num_periods)    # Up Minus Down (Momentum) - often has a higher premium/volatility

    # Create a DataFrame for factor returns
    factors_df = pd.DataFrame({
        'Mkt-RF': mkt_rf,
        'SMB': smb,
        'HML': hml,
        'UMD': umd
    })

    # Simulate asset returns based on factors and some hypothetical betas
    # Example betas for a hypothetical asset:
    asset_betas = {
        'Mkt-RF': 1.05, # Slightly market-sensitive
        'SMB': 0.3,    # Small-cap exposure
        'HML': 0.1,    # Slight value exposure
        'UMD': 0.2     # Positive momentum exposure
    }
    alpha = 0.0003 # A small abnormal return not explained by factors
    idiosyncratic_noise = np.random.normal(0, 0.009, num_periods) # Random asset-specific noise

    # Calculate expected excess return based on factors and betas
    asset_excess_return = (
        alpha
        + factors_df['Mkt-RF'] * asset_betas['Mkt-RF']
        + factors_df['SMB'] * asset_betas['SMB']
        + factors_df['HML'] * asset_betas['HML']
        + factors_df['UMD'] * asset_betas['UMD']
        + idiosyncratic_noise
    )

    # Calculate total asset return
    asset_returns = rf + asset_excess_return

    # Combine into a single DataFrame
    data_df = pd.DataFrame({
        'Asset_Return': asset_returns,
        'RF': rf, # Add RF to the DataFrame for convenience
    })
    data_df = pd.concat([data_df, factors_df], axis=1)

    return data_df

# --- 2. Carhart 4-Factor Model Implementation (Regression) ---

def run_carhart_4_factor_regression(data_df, asset_col='Asset_Return', risk_free_col='RF'):
    """
    Performs a multiple linear regression for the Carhart 4-factor model.
    Args:
        data_df (pd.DataFrame): DataFrame containing asset returns, risk-free rate, and factor returns.
        asset_col (str): Name of the column with asset returns.
        risk_free_col (str): Name of the column with risk-free rate.

    Returns:
        statsmodels.regression.linear_model.RegressionResultsWrapper: The regression results object.
    """
    # Define the 4 factors for the model
    carhart_4_factor_cols = ['Mkt-RF', 'SMB', 'HML', 'UMD']

    # Calculate excess returns for the asset
    data_df['Asset_Excess_Return'] = data_df[asset_col] - data_df[risk_free_col]

    # Define the dependent variable (Y) and independent variables (X)
    Y = data_df['Asset_Excess_Return']
    X = data_df[carhart_4_factor_cols]

    # Add a constant to the independent variables for the intercept (alpha)
    X = sm.add_constant(X)

    # Perform the OLS (Ordinary Least Squares) regression
    model = sm.OLS(Y, X)
    results = model.fit()

    return results

def calculate_expected_return_carhart_4_factor(regression_results, expected_factor_returns, risk_free_rate):
    """
    Calculates the expected return of an asset based on estimated betas from the 4-factor model
    and expected future factor returns.
    Args:
        regression_results (statsmodels.regression.linear_model.RegressionResultsWrapper):
            The results object from the regression.
        expected_factor_returns (dict): Dictionary with expected returns for each of the 4 factors.
                                        e.g., {'Mkt-RF': 0.05, 'SMB': 0.02, 'HML': 0.03, 'UMD': 0.04} (annualized)
        risk_free_rate (float): The current risk-free rate (should be consistent with factor return periodicity).

    Returns:
        tuple: (expected_asset_return, alpha, betas_dict)
               - float: The calculated expected return of the asset.
               - float: The estimated alpha from the regression.
               - dict: A dictionary of estimated betas.
    """
    # Extract alpha and betas from the regression results
    alpha = regression_results.params['const']
    betas = {factor: regression_results.params[factor] for factor in expected_factor_returns.keys()}

    # Calculate the sum of (beta * expected factor return)
    factor_contribution = sum(betas[factor] * expected_factor_returns[factor] for factor in expected_factor_returns.keys())

    # Calculate the expected asset return
    # E[R_i] = R_f + alpha_i + beta_mkt * E[R_mkt-rf] + beta_smb * E[SMB] + ...
    expected_asset_return = risk_free_rate + alpha + factor_contribution

    return expected_asset_return, alpha, betas

# --- 3. Example Usage ---

if __name__ == "__main__":
    print("--- Generating Synthetic Data for Carhart 4-Factor Model ---")
    # Generate 20 years of monthly data
    historical_data_carhart4 = generate_synthetic_data_carhart_4_factor(num_periods=240)
    print(historical_data_carhart4.head())
    print("\nData Info:")
    historical_data_carhart4.info()

    print("\n--- Running Carhart 4-Factor Regression ---")
    regression_results_carhart4 = run_carhart_4_factor_regression(historical_data_carhart4)

    print("\nRegression Results Summary (Carhart 4-Factor):")
    print(regression_results_carhart4.summary())

    # Extracting key results:
    print("\nEstimated Alpha (Intercept):", regression_results_carhart4.params['const'])
    print("Estimated Betas:")
    carhart_4_factor_names = ['Mkt-RF', 'SMB', 'HML', 'UMD']
    for factor in carhart_4_factor_names:
        print(f"  {factor}: {regression_results_carhart4.params[factor]:.4f}")
    print(f"R-squared: {regression_results_carhart4.rsquared:.4f}")

    print("\n--- Calculating Expected Return (Carhart 4-Factor) ---")

    # Define expected future factor returns (these are forecasts, not historical)
    # These should be consistent with the periodicity of your historical data (e.g., monthly)
    # For demonstration, let's assume some monthly expected factor returns
    expected_monthly_factor_returns_carhart4 = {
        'Mkt-RF': 0.005,   # 0.5% monthly market risk premium (~6% annual)
        'SMB': 0.002,      # 0.2% monthly SMB premium (~2.4% annual)
        'HML': 0.001,      # 0.1% monthly HML premium
        'UMD': 0.003       # 0.3% monthly UMD premium (~3.6% annual)
    }
    current_risk_free_rate_carhart4 = 0.002 / 12 # Consistent with the synthetic data's RF

    expected_asset_return_carhart4, alpha_est_carhart4, betas_est_carhart4 = calculate_expected_return_carhart_4_factor(
        regression_results_carhart4,
        expected_monthly_factor_returns_carhart4,
        current_risk_free_rate_carhart4
    )

    print(f"\nExpected Monthly Risk-Free Rate: {current_risk_free_rate_carhart4:.4f}")
    print("Expected Monthly Factor Returns:")
    for factor, ret in expected_monthly_factor_returns_carhart4.items():
        print(f"  {factor}: {ret:.4f}")
    print(f"Estimated Alpha: {alpha_est_carhart4:.4f}")
    print("Estimated Betas:")
    for factor, beta_val in betas_est_carhart4.items():
        print(f"  {factor}: {beta_val:.4f}")

    print(f"\nCalculated Expected Monthly Asset Return: {expected_asset_return_carhart4:.4f}")
    print(f"Calculated Expected Annual Asset Return: {(1 + expected_asset_return_carhart4)**12 - 1:.4f}")