In [12]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import numpy as np

# Define tickers (5 stocks + S&P 500 index)
tickers = ["AAPL", "MSFT", "JPM", "JNJ", "XOM", "^GSPC"]

# Define date range
start_date = "1990-01-01"
end_date = "2024-02-16"

# Download daily data
data = yf.download(tickers, start=start_date, end=end_date, interval="1d")

# Print column names to see what's available
print("Available columns:", data.columns.levels[0] if isinstance(data.columns, pd.MultiIndex) else data.columns)

# Extract adjusted close prices (using 'Adj Close' or 'Close' depending on what's available)
if isinstance(data.columns, pd.MultiIndex):
    if 'Adj Close' in data.columns.levels[0]:
        adj_close = data.xs('Adj Close', axis=1, level=0)
    else:
        adj_close = data.xs('Close', axis=1, level=0)
else:
    if 'Adj Close' in data.columns:
        adj_close = data['Adj Close']
    else:
        adj_close = data['Close']

# Resample to get last trading day's closing price of each month
monthly_close = adj_close.resample('ME').last()

# Save to CSV
monthly_close.to_csv("monthly_closing_prices_fixed.csv")

print(monthly_close.head())

[*********************100%***********************]  6 of 6 completed

Available columns: Index(['Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')
Ticker          AAPL       JNJ       JPM      MSFT       XOM       ^GSPC
Date                                                                    
1990-01-31  0.238996  2.870665  2.810274  0.395284  3.602301  329.079987
1990-02-28  0.239766  2.954548  2.796633  0.421993  3.647899  331.890015
1990-03-31  0.283841  3.092286  2.615755  0.473273  3.589686  339.940002
1990-04-30  0.277671  3.119834  2.615755  0.495709  3.512073  330.799988
1990-05-31  0.291701  3.539010  3.063370  0.623909  3.774213  361.230011





## Price Averaging

In [None]:
import pandas as pd

def compute_ohlc_average(open_prices, high_prices, low_prices, close_prices):
    """
    Compute the average price using Open, High, Low, and Close. Reducing short term random noise in returns

    """
    ohlc_avg = (open_prices + high_prices + low_prices + close_prices) / 4
    return ohlc_avg


## Log Returns


In [13]:

# Load monthly closing prices
monthly_close = pd.read_csv("monthly_closing_prices_fixed.csv", index_col="Date", parse_dates=True)

# Compute log returns
log_returns = np.log(monthly_close / monthly_close.shift(1))

# Save to CSV
log_returns.to_csv("monthly_log_returns.csv")

print(log_returns.head())


                AAPL       JNJ       JPM      MSFT       XOM     ^GSPC
Date                                                                  
1990-01-31       NaN       NaN       NaN       NaN       NaN       NaN
1990-02-28  0.003216  0.028802 -0.004866  0.065384  0.012579  0.008503
1990-03-31  0.168751  0.045565 -0.066864  0.114683 -0.016087  0.023966
1990-04-30 -0.021978  0.008869  0.000000  0.046316 -0.021858 -0.027255
1990-05-31  0.049291  0.126067  0.157963  0.230015  0.071986  0.088001


# Factor Calculation Functions

## Calculate Rolling Market Beta 5yr

In [15]:
import numpy as np
import pandas as pd

# Load monthly log returns
log_returns = pd.read_csv("monthly_log_returns.csv", index_col="Date", parse_dates=True)

# Define market index (^GSPC) as the benchmark
market_returns = log_returns["^GSPC"]

# Rolling Beta Calculation (5-Year = 60 months)
rolling_betas = pd.DataFrame(index=log_returns.index)

window_size = 60  # 5 years 

for stock in log_returns.columns:
    if stock != "^GSPC":  # Exclude market index itself
        rolling_betas[stock] = (
            log_returns[stock]
            .rolling(window=window_size)
            .cov(market_returns)
            / market_returns.rolling(window=window_size).var()
        )

# Drop NaNs from initial periods
rolling_betas.dropna(inplace=True)

# Save to CSV
rolling_betas.to_csv("rolling_beta_5yr.csv")

print(rolling_betas.head())



First few rows of 5-Year Rolling Beta:
                AAPL       JNJ       JPM      MSFT       XOM
Date                                                        
1995-01-31  1.842263  1.170682  1.346224  1.261346  0.573694
1995-02-28  1.810642  1.142209  1.337555  1.257423  0.576990
1995-03-31  1.732785  1.142203  1.337093  1.260940  0.591881
1995-04-30  1.764826  1.173022  1.370849  1.314485  0.591689
1995-05-31  1.899498  1.140757  1.349525  1.181346  0.579208


## Size Factor (SMB - Small Minus Big)

In [None]:
def compute_smb(returns, market_caps, size_threshold=50):
    """
    Compute the SMB (Size) factor.

    :param returns: DataFrame 
    :param market_caps: DataFrame 
    :param size_threshold: Percentile split
    :return: Series of SMB factor returns
    """
    # Determine size breakpoints
    size_cutoff = market_caps.quantile(size_threshold / 100, axis=1)
    
    # Define small and big firm portfolios
    small_firms = returns[market_caps.le(size_cutoff, axis=0)]
    big_firms = returns[market_caps.gt(size_cutoff, axis=0)]

    # Compute SMB as the average return difference
    smb = small_firms.mean(axis=1) - big_firms.mean(axis=1)

    return smb


## Value Factor (HML - High Minus Low)

In [None]:
def compute_hml(returns, book_to_market, value_threshold=50):
    """
    Compute the HML (Value) factor.

    :param returns: DataFrame 
    :param book_to_market: DataFrame
    :param value_threshold: Percentile split 
    :return: Series of HML factor returns
    """
    # Determine value breakpoints
    value_cutoff = book_to_market.quantile(value_threshold / 100, axis=1)

    # Define value (high B/M) and growth (low B/M) portfolios
    value_stocks = returns[book_to_market.ge(value_cutoff, axis=0)]
    growth_stocks = returns[book_to_market.lt(value_cutoff, axis=0)]

    # Compute HML as the average return difference
    hml = value_stocks.mean(axis=1) - growth_stocks.mean(axis=1)

    return hml


## Momentum Factor (UMD - Up Minus Down)

In [None]:
def compute_umd(returns, momentum_window=12, momentum_threshold=30):
    """
    Compute the UMD (Momentum) factor.

    :param returns: DataFrame
    :param momentum_window: Look-back period for past performance 
    :param momentum_threshold: Percentile split
    :return: Series of UMD factor returns
    """
    # Compute past momentum (excluding last month)
    past_returns = returns.shift(1).rolling(momentum_window).sum()

    # Determine winner & loser breakpoints
    top_cutoff = past_returns.quantile((100 - momentum_threshold) / 100, axis=1)
    bottom_cutoff = past_returns.quantile(momentum_threshold / 100, axis=1)

    # Define winners (top) and losers (bottom) portfolios
    winners = returns[past_returns.ge(top_cutoff, axis=0)]
    losers = returns[past_returns.le(bottom_cutoff, axis=0)]

    # Compute UMD as the average return difference
    umd = winners.mean(axis=1) - losers.mean(axis=1)

    return umd


## Profitability Factor (RMW - Robust Minus Weak)

In [None]:
def compute_rmw(returns, roe, profitability_threshold=50):
    """
    Compute the RMW (Profitability) factor.

    :param returns: DataFrame 
    :param roe: DataFrame 
    :param profitability_threshold: Percentile split
    :return: Series of RMW factor returns
    """
    # Determine profitability breakpoints
    profit_cutoff = roe.quantile(profitability_threshold / 100, axis=1)

    # Define robust (high ROE) and weak (low ROE) portfolios
    robust = returns[roe.ge(profit_cutoff, axis=0)]
    weak = returns[roe.lt(profit_cutoff, axis=0)]

    # Compute RMW as the average return difference
    rmw = robust.mean(axis=1) - weak.mean(axis=1)

    return rmw


## Investment Factor (CMA - Conservative Minus Aggressive)

In [None]:
def compute_cma(returns, asset_growth, investment_threshold=50):
    """
    Compute the CMA (Investment) factor.

    :param returns: DataFrame
    :param asset_growth: DataFrame
    :param investment_threshold: Percentile split 
    :return: Series of CMA factor returns
    """
    # Determine investment breakpoints
    investment_cutoff = asset_growth.quantile(investment_threshold / 100, axis=1)

    # Define conservative (low investment) and aggressive (high investment) portfolios
    conservative = returns[asset_growth.le(investment_cutoff, axis=0)]
    aggressive = returns[asset_growth.gt(investment_cutoff, axis=0)]

    # Compute CMA as the average return difference
    cma = conservative.mean(axis=1) - aggressive.mean(axis=1)

    return cma


## Pastor-Stambaugh Liquidity Factor (LIQ)

In [None]:
def compute_liq(returns, traded_volume, market_impact, liquidity_threshold=50):
    """
    Compute the LIQ (Liquidity) factor.

    :param returns: DataFrame
    :param traded_volume: DataFrame 
    :param market_impact: DataFrame
    :param liquidity_threshold: Percentile split 
    :return: Series of LIQ factor returns
    """
    # Compute liquidity measure: Trading Volume * Market Impact
    liquidity_measure = traded_volume * market_impact
    
    # Determine liquidity breakpoints
    liquidity_cutoff = liquidity_measure.quantile(liquidity_threshold / 100, axis=1)

    # Define illiquid (low liquidity) and liquid (high liquidity) portfolios
    illiquid = returns[liquidity_measure.le(liquidity_cutoff, axis=0)]
    liquid = returns[liquidity_measure.gt(liquidity_cutoff, axis=0)]

    # Compute LIQ as the average return difference
    liq = illiquid.mean(axis=1) - liquid.mean(axis=1)

    return liq


## Return on Equity (ROE) Factor

In [None]:
def compute_q_roe(returns, roe, roe_threshold=50):
    """
    Compute the ROE (Return on Equity) factor for the Q-Factor Model.

    :param returns: DataFrame
    :param roe: DataFrame 
    :param roe_threshold: Percentile split 
    :return: Series of ROE factor returns
    """
    # Determine ROE breakpoints
    roe_cutoff = roe.quantile(roe_threshold / 100, axis=1)

    # Define high-ROE and low-ROE portfolios
    high_roe = returns[roe.ge(roe_cutoff, axis=0)]
    low_roe = returns[roe.lt(roe_cutoff, axis=0)]

    # Compute ROE factor as the average return difference
    roe_factor = high_roe.mean(axis=1) - low_roe.mean(axis=1)

    return roe_factor


## Rolling Factor Betas

In [None]:
def rolling_beta(factor_returns, market_returns, window=60):
    """
    Compute rolling beta for asset pricing factors.

    :param factor_returns: DataFrame
    :param market_returns: Series of market index log returns 
    :param window: Rolling window size in months 
    :return: DataFrame of rolling betas for each factor
    """
    rolling_betas = pd.DataFrame(index=factor_returns.index)

    for factor in factor_returns.columns:
        rolling_betas[factor] = (
            factor_returns[factor]
            .rolling(window=window)
            .cov(market_returns)
            / market_returns.rolling(window=window).var()
        )

    return rolling_betas.dropna()


# Models

## Capital Asset Pricing Model (CAPM)

In [None]:
def capm(beta, risk_free_rate, market_return):
    """
    Compute expected returns using CAPM.

    :param beta:
    :param risk_free_rate:
    :param market_return: 
    :return: expected returns
    """
    return risk_free_rate + beta.mul(market_return - risk_free_rate, axis=0)


## Black CAPM (Zero Risk-Free Rate)

In [None]:
def black_capm(beta, market_return):
    """
    Compute expected returns using Black CAPM (no risk-free rate).

    param beta: DataFrame
    param market_return: Series
    return: DataFrame of expected returns
    """
    return beta.mul(market_return, axis=0)


## Fama-French Three-Factor Model (FF3)

In [None]:
def fama_french_3f(beta_m, beta_smb, beta_hml, risk_free_rate, market_return, smb, hml):
    """
    Compute expected returns using the Fama-French Three-Factor Model.

    param beta_m: DataFrame
    param beta_smb: DataFrame
    param beta_hml: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param smb: Series
    param hml: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_smb.mul(smb, axis=0) + beta_hml.mul(hml, axis=0)


## Carhart Four-Factor Model

In [None]:
def carhart_4f(beta_m, beta_smb, beta_hml, beta_umd, risk_free_rate, market_return, smb, hml, umd):
    """
    Compute expected returns using the Carhart Four-Factor Model.

    param beta_m: DataFrame
    param beta_smb: DataFrame
    param beta_hml: DataFrame
    param beta_umd: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param smb: Series
    param hml: Series
    param umd: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_smb.mul(smb, axis=0) + beta_hml.mul(hml, axis=0) + beta_umd.mul(umd, axis=0)


## Fama-French Five-Factor Model (FF5)

In [None]:
def fama_french_5f(beta_m, beta_smb, beta_hml, beta_rmw, beta_cma, risk_free_rate, market_return, smb, hml, rmw, cma):
    """
    Compute expected returns using the Fama-French Five-Factor Model.

    param beta_m: DataFrame
    param beta_smb: DataFrame
    param beta_hml: DataFrame
    param beta_rmw: DataFrame
    param beta_cma: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param smb: Series
    param hml: Series
    param rmw: Series
    param cma: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_smb.mul(smb, axis=0) + beta_hml.mul(hml, axis=0) + beta_rmw.mul(rmw, axis=0) + beta_cma.mul(cma, axis=0)


## Pastor-Stambaugh Liquidity Model

In [None]:
def pastor_stambaugh(beta_m, beta_smb, beta_hml, beta_liq, risk_free_rate, market_return, smb, hml, liq):
    """
    Compute expected returns using the Pastor-Stambaugh Liquidity Model.

    param beta_m: DataFrame
    param beta_smb: DataFrame
    param beta_hml: DataFrame
    param beta_liq: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param smb: Series
    param hml: Series
    param liq: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_smb.mul(smb, axis=0) + beta_hml.mul(hml, axis=0) + beta_liq.mul(liq, axis=0)


## Q-Factor Model

In [None]:
def q_factor(beta_m, beta_me, beta_i, beta_roe, risk_free_rate, market_return, me, i, roe):
    """
    Compute expected returns using the Q-Factor Model.

    param beta_m: DataFrame
    param beta_me: DataFrame
    param beta_i: DataFrame
    param beta_roe: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param me: Series
    param i: Series
    param roe: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_me.mul(me, axis=0) + beta_i.mul(i, axis=0) + beta_roe.mul(roe, axis=0)


##  Consumption CAPM (CCAPM)

In [None]:
def ccapm(beta_c, risk_free_rate, consumption_growth):
    """
    Compute expected returns using the Consumption CAPM (CCAPM).

    param beta_c: DataFrame
    param risk_free_rate: Series
    param consumption_growth: Series
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_c.mul(consumption_growth, axis=0)


##  Intertemporal CAPM (ICAPM)

In [None]:
def icapm(beta_m, beta_z, risk_free_rate, market_return, state_variables):
    """
    Compute expected returns using the Intertemporal CAPM (ICAPM).

    param beta_m: DataFrame
    param beta_z: DataFrame
    param risk_free_rate: Series
    param market_return: Series
    param state_variables: DataFrame
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_m.mul(market_return - risk_free_rate, axis=0) + beta_z.mul(state_variables, axis=0).sum(axis=1)


## Stochastic Discount Factor Model (SDF)


In [None]:
def sdf(expected_m, returns):
    """
    Compute expected returns using the Stochastic Discount Factor Model (SDF).

    param expected_m: DataFrame
    param returns: DataFrame
    return: DataFrame of expected returns
    """
    return expected_m.mul(returns, axis=0)


## Arbitrage Pricing Theory (APT) Model

In [None]:
def apt(beta_factors, risk_free_rate, factor_returns):
    """
    Compute expected returns using the Arbitrage Pricing Theory (APT) Model.

    param beta_factors: DataFrame
    param risk_free_rate: Series
    param factor_returns: DataFrame
    return: DataFrame of expected returns
    """
    return risk_free_rate + beta_factors.mul(factor_returns, axis=0).sum(axis=1)
