In [65]:
import pandas as pd
import numpy as np
import math
import sys
from sklearn.linear_model import LinearRegression
import scipy.optimize
import statsmodels.api as sm

momentum_data_path = 'F-F_Momentum_Factor_daily.CSV'
fama_french_data_path = 'F-F_Research_Data_Factors_daily.CSV'
stock_prices_path = 'DailyPrices.csv'

stock_prices = pd.read_csv(stock_prices_path)
momentum_data = pd.read_csv(momentum_data_path, skiprows=4, skipfooter=2, engine='python')
fama_french_data = pd.read_csv(fama_french_data_path, skiprows=4, skipfooter=2, engine='python')

In [66]:
momentum_data = pd.read_csv(momentum_data_path, skiprows=4, skipfooter=2, engine='python')
fama_french_data = pd.read_csv(fama_french_data_path, skiprows=4, skipfooter=2, engine='python')

momentum_data.columns = ['Date', 'Momentum']
fama_french_data.columns = ['Date', 'Mkt-RF', 'SMB', 'HML', 'RF']

momentum_data['Date'] = pd.to_datetime(momentum_data['Date'], format='%Y%m%d', errors='coerce')
fama_french_data['Date'] = pd.to_datetime(fama_french_data['Date'], format='%Y%m%d', errors='coerce')
momentum_data = momentum_data.dropna(subset=['Date'])
fama_french_data = fama_french_data.dropna(subset=['Date'])

merged_data = pd.merge(fama_french_data, momentum_data, on='Date')

In [69]:
def return_calculate(prices: pd.DataFrame, method="DISCRETE", date_column="Date"):
    if date_column not in prices.columns:
        raise ValueError(f"date_column: {date_column} not in DataFrame")

    vars = prices.columns.tolist()
    vars.remove(date_column)

    p = prices[vars].to_numpy()
    n, m = p.shape

    if method.upper() == "DISCRETE":
        p2 = (p[1:, :] / p[:-1, :]) - 1.0
    elif method.upper() == "LOG":
        p2 = np.log(p[1:, :] / p[:-1, :])
    else:
        raise ValueError(f"Invalid method '{method}'. Must be 'DISCRETE' or 'LOG'.")

    dates = prices[date_column].iloc[1:].reset_index(drop=True)
    returns_df = pd.DataFrame(p2, columns=vars)
    returns_df.insert(0, date_column, dates)
    
    return returns_df

In [70]:
stock_prices['Date'] = pd.to_datetime(stock_prices['Date'])

# Calculate stock returns
stock_returns = return_calculate(stock_prices, method="DISCRETE", date_column="Date")
stock_returns.set_index("Date", inplace=True)

stocks = ['AAPL', 'META', 'UNH', 'MA', 'MSFT', 'NVDA', 'HD', 'PFE', 
          'AMZN', 'BRK-B', 'PG', 'XOM', 'TSLA', 'JPM', 'V', 'DIS', 
          'GOOGL', 'JNJ', 'BAC', 'CSCO']

merged_data = merged_data[(merged_data['Date'] >= stock_returns.index.min()) & 
                          (merged_data['Date'] <= stock_returns.index.max())]

risk_free_rate = merged_data['RF'].values / 100
excess_returns = stock_returns.sub(risk_free_rate, axis=0)

factors = ['Mkt-RF', 'SMB', 'HML', 'Momentum']
avg_factor_rets = merged_data[factors].mean()
expected_returns = {}
coefficients = {}

for stock in stocks:
    model = LinearRegression()
    factor_data = merged_data[factors].iloc[:len(excess_returns[stock])]
    model.fit(factor_data, excess_returns[stock].iloc[:len(factor_data)])
    coefficients[stock] = model.coef_
    daily_return_mean = np.mean(stock_returns[stock])
    expected_returns[stock] = daily_return_mean * 252

annual_cov_matrix = stock_returns[stocks].cov() * 252

In [71]:
def neg_sharpe_ratio(weights, mean_returns, cov_matrix, risk_free_rate):
    weights = np.array(weights)
    portfolio_return = np.dot(weights, mean_returns)
    portfolio_std_dev = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    return -((portfolio_return - risk_free_rate) / portfolio_std_dev)

constraints = {'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}
bounds = [(0, 1) for _ in stocks]
initial_weights = np.array([1 / len(stocks)] * len(stocks))

mean_returns = np.array(list(expected_returns.values()))
risk_free_rate = float(0.05)

result = scipy.optimize.minimize(
    fun=neg_sharpe_ratio,
    x0=initial_weights,
    args=(mean_returns, annual_cov_matrix, risk_free_rate),
    method='SLSQP',
    bounds=bounds,
    constraints=constraints
)

optimal_weights = result.x
optimal_sharpe_ratio = -result.fun

optimal_portfolio_return = np.dot(optimal_weights, mean_returns)
optimal_portfolio_std_dev = np.sqrt(np.dot(optimal_weights.T, np.dot(annual_cov_matrix, optimal_weights)))

In [73]:
optimal_weights_df = pd.DataFrame(optimal_weights, index=stocks, columns=["Weight"])
optimal_weights_df["Weight"] = optimal_weights_df["Weight"].map(lambda x: round(x, 4) * 100)

print("Annual Covariance Matrix:")
print(annual_cov_matrix)
print("Super Efficient Portfolio Weights:")
print(optimal_weights_df)
print("\nExpected Annual Return:", optimal_portfolio_return)
print("Portfolio Risk (Standard Deviation):", optimal_portfolio_std_dev)
print("Sharpe Ratio:", optimal_sharpe_ratio)


Annual Covariance Matrix:
           AAPL      META       UNH        MA      MSFT      NVDA        HD  \
AAPL   0.050501  0.020870 -0.001577  0.010380  0.021561  0.033939  0.010867   
META   0.020870  0.136272 -0.011660  0.015539  0.041722  0.077580  0.010079   
UNH   -0.001577 -0.011660  0.049432  0.005066 -0.001927 -0.016474  0.006551   
MA     0.010380  0.015539  0.005066  0.027007  0.012275  0.021657  0.011621   
MSFT   0.021561  0.041722 -0.001927  0.012275  0.039604  0.045418  0.011498   
NVDA   0.033939  0.077580 -0.016474  0.021657  0.045418  0.253770  0.023843   
HD     0.010867  0.010079  0.006551  0.011621  0.011498  0.023843  0.042762   
PFE    0.002864  0.000547  0.010636  0.003087  0.005221 -0.016984  0.005594   
AMZN   0.024202  0.064354 -0.006390  0.015380  0.035189  0.067779  0.016908   
BRK-B  0.006499  0.008382  0.006006  0.010321  0.006653  0.003171  0.010077   
PG    -0.000199 -0.000426  0.007212  0.004023  0.002121 -0.009521  0.003260   
XOM   -0.002407 -0.005812 