In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.optimize as opt

In [2]:
# data: 
folder = 'data'
xls_dict  = pd.read_excel(folder + '/trading-game-data-08112023.xlsx', sheet_name=None)

index_price_df = xls_dict['index-price']
price_df = xls_dict['price']
size_df = xls_dict['size']
price_to_book_df = xls_dict['price-to-book']
turnover_df = xls_dict['turnover']

## Markowitz Portfolio Theory

In [3]:
price_df = xls_dict['price'].reset_index()
price_df = price_df.drop('index', axis = 1)
price_df['Date'] = pd.to_datetime(price_df['Date'])
price_df.set_index('Date', inplace=True)
daily_returns = price_df.pct_change()
expected_returns = daily_returns.mean()
risk = daily_returns.std()

risk.sort_values()

WMT      0.008384
KO       0.008696
MCD      0.008889
BRK.B    0.008931
PG       0.009294
           ...   
GNRC     0.036768
SEDG     0.038786
CMA      0.039898
ZION     0.041273
CTLT     0.042210
Length: 500, dtype: float64

## CAPM model
- Time horizon is the full dataframe

In [4]:
def calc_CAPM_betas(daily_returns, sp_500_daily_returns):
    """
    Calculate the CAPM beta values for the stocks in the daily_returns DataFrame.
    """
    
    # Join the daily returns of the stocks with the S&P 500 daily returns
    daily_returns_with_sp500 = daily_returns.join(sp_500_daily_returns.rename('SP500'))
    
    # Calculate the covariance matrix of the returns
    cov_matrix_with_sp500 = daily_returns_with_sp500.cov()
    
    # The market variance is the variance of the S&P 500 returns
    market_var = sp_500_daily_returns.var()
    
    # Calculate the betas for each stock
    betas = cov_matrix_with_sp500.loc[:, 'SP500'] / market_var
    
    # if ['index'] in betas.columns: 
    betas = betas.drop(['SP500'], axis=0)
    return betas

index_price_df = xls_dict['index-price'].reset_index()
index_price_df['Date'] = pd.to_datetime(index_price_df['Date'])
index_price_df.set_index('Date', inplace=True)
sp_500_daily_returns = index_price_df['S&P 500'].pct_change()

n_observations = index_price_df.shape[0]

betas = calc_CAPM_betas(daily_returns, sp_500_daily_returns)
betas

A       0.892179
AAL     1.329263
AAPL    1.141045
ABBV    0.170225
ABNB    1.641490
          ...   
YUM     0.572034
ZBH     0.598824
ZBRA    1.630006
ZION    2.190876
ZTS     1.022609
Name: SP500, Length: 500, dtype: float64

In [5]:
def calc_expectedreturns(daily_returns, rf_rate, betas, market_return):
    expected_returns = rf_rate + betas * (market_return - rf_rate)
    
    average_returns = daily_returns.mean() * n_observations  # Assuming 252 trading days in a year

    # Step 3: Determine undervalued/overvalued stocks
    comparison = pd.DataFrame({
        'Beta': betas,
        'Expected Return': expected_returns,
        'Average Return': average_returns
    })
    comparison['Over/Under Valued'] = comparison.apply(
        lambda row: 'Undervalued' if row['Average Return'] > row['Expected Return'] else 'Overvalued',
        axis=1
    )
    
    return comparison

if 'index' in daily_returns.columns:
    daily_returns = daily_returns.drop(['index'], axis=1).copy()

market_return = np.prod(1 + sp_500_daily_returns.dropna())**(n_observations / len(sp_500_daily_returns.dropna())) - 1
risk_free_rate = 0.0477 
result_df = calc_expectedreturns(daily_returns, risk_free_rate,betas, market_return)
print(result_df)

          Beta  Expected Return  Average Return Over/Under Valued
A     0.892179         0.130986       -0.275985        Overvalued
AAL   1.329263         0.171789       -0.024790        Overvalued
AAPL  1.141045         0.154218        0.356988       Undervalued
ABBV  0.170225         0.063591       -0.110896        Overvalued
ABNB  1.641490         0.200936        0.429907       Undervalued
...        ...              ...             ...               ...
YUM   0.572034         0.101100       -0.009127        Overvalued
ZBH   0.598824         0.103601       -0.166722        Overvalued
ZBRA  1.630006         0.199864       -0.135159        Overvalued
ZION  2.190876         0.252222       -0.178857        Overvalued
ZTS   1.022609         0.143162        0.160190       Undervalued

[500 rows x 4 columns]


In [6]:
undervalued_stocks = result_df[result_df['Over/Under Valued'] == 'Undervalued']
undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
undervalued_count = len(undervalued_stocks)

if undervalued_count > 200:
    # If there are more than 200 undervalued stocks, select the MOST undervalued
    most_undervalued_stocks = undervalued_stocks.sort_values(by='Undervalued Score', ascending=False)
    top_undervalued_stocks = most_undervalued_stocks.head(200)
    selected_stocks = top_undervalued_stocks.index

elif undervalued_count < 200:
    # If there are less than 200 undervalued stocks, select out of the overvalued the least overvalued ones
    overvalued_stocks = result_df[result_df['Over/Under Valued'] == 'Overvalued']
    overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']
    sorted_overvalued_stocks = overvalued_stocks.sort_values(by='Potential', ascending=False)
    combined_stocks = pd.concat([undervalued_stocks, sorted_overvalued_stocks.head(200 - undervalued_count)])
    selected_stocks = combined_stocks.index
    
else: 
    selected_stocks = undervalued_stocks.index

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']


In [7]:
selected_stock_names = stocks_with_weights['Stock'].tolist()
filtered_result_df = result_df[result_df.index.isin(selected_stock_names)]
filtered_result_df

NameError: name 'stocks_with_weights' is not defined

In [8]:
newdata = price_df.drop(price_df.columns[0], axis=1)
returns = np.log(newdata/newdata.shift(1))
returns = returns.drop(returns.index[0])  
returns = returns[selected_stocks]

n_stocks = len(selected_stocks)

In [10]:
def objective_function(weights: list, returns):
    
    mean_returns = np.mean(returns, axis=0)
    portfolio_return = weights @  mean_returns

    portfolio_std = np.sqrt(weights @ np.cov(returns.T) @ weights.T)
    # print(portfolio_std)

    return -1 * (portfolio_return - 0.25 * portfolio_std)  # Minimize the negative of the objective

initial_weights = np.array([1 / n_stocks] * n_stocks)
constraints = ( 
        {'type': 'ineq', 'fun': lambda weights: 0.85 - np.sum(weights)},  # Sum of weights >= 0.85
        {'type': 'ineq', 'fun': lambda weights: np.sum(weights) - 1.0}  # Sum of weights <= 1
    )
bounds = tuple((0, 1) for x in range(n_stocks))


optimized = opt.minimize(objective_function, initial_weights, args= (returns), bounds=bounds, constraints=constraints)  # Adjust the method as needed
optimal_weights = optimized.x

In [12]:
stock_names = returns.columns
stocks_with_weights = calc_normalized_weights(optimal_weights, stock_names, threshold=0.01)
stocks_with_weights

Unnamed: 0,Stock,Weight
27,CBOE,0.256455
36,CMG,0.032481
37,COR,0.063093
62,GE,0.100287
78,LLY,0.144707
83,MCK,0.020434
85,META,0.129168
89,MPC,0.031464
99,NVDA,0.113009
106,PANW,0.020049


In [13]:
stock_names = returns.columns
stocks_with_weights = calc_normalized_weights(optimal_weights, stock_names, threshold=0.001)
stocks_with_weights.shape

(13, 2)