In [73]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.optimize as opt

In [74]:
# data: 
folder = 'data'
xls_dict  = pd.read_excel(folder + '/trading-game-data-20102023.xlsx', sheet_name=None)

index_price_df = xls_dict['index-price']
price_df = xls_dict['price']
size_df = xls_dict['size']
price_to_book_df = xls_dict['price-to-book']
turnover_df = xls_dict['turnover']

## Markowitz Portfolio Theory

In [115]:
price_df = xls_dict['price'].reset_index()
price_df = price_df.drop('index', axis = 1)
price_df['Date'] = pd.to_datetime(price_df['Date'])
price_df.set_index('Date', inplace=True)
daily_returns = price_df.pct_change()
expected_returns = daily_returns.mean()
risk = daily_returns.std()

risk.sort_values()

WMT      0.008431
KO       0.008471
MCD      0.008852
BRK.B    0.008859
PG       0.009301
           ...   
KEY      0.036913
SEDG     0.038979
CMA      0.040414
CTLT     0.041364
ZION     0.041840
Length: 500, dtype: float64

## CAPM model
- Time horizon is the full dataframe

In [62]:
def calc_CAPM_betas(daily_returns, sp_500_daily_returns):
    """
    Calculate the CAPM beta values for the stocks in the daily_returns DataFrame.
    """
    
    # Join the daily returns of the stocks with the S&P 500 daily returns
    daily_returns_with_sp500 = daily_returns.join(sp_500_daily_returns.rename('SP500'))
    
    # Calculate the covariance matrix of the returns
    cov_matrix_with_sp500 = daily_returns_with_sp500.cov()
    
    # The market variance is the variance of the S&P 500 returns
    market_var = sp_500_daily_returns.var()
    
    # Calculate the betas for each stock
    betas = cov_matrix_with_sp500.loc[:, 'SP500'] / market_var
    
    betas = betas.drop(['SP500', 'index'], axis=0)
    return betas

index_price_df = xls_dict['index-price'].reset_index()
index_price_df['Date'] = pd.to_datetime(index_price_df['Date'])
index_price_df.set_index('Date', inplace=True)
sp_500_daily_returns = index_price_df['S&P 500'].pct_change()

betas = calc_CAPM_betas(daily_returns, sp_500_daily_returns)
betas

A       0.912992
AAL     1.348263
AAPL    1.150799
ABBV    0.130669
ABNB    1.690057
          ...   
YUM     0.556312
ZBH     0.571193
ZBRA    1.702181
ZION    2.221824
ZTS     0.977542
Name: SP500, Length: 500, dtype: float64

In [71]:
def calc_expectedreturns(daily_returns, rf_rate, betas, market_return):
    expected_returns = rf_rate + betas * (market_return - rf_rate)
    
    average_returns = daily_returns.mean() * 252  # Assuming 252 trading days in a year

    # Step 3: Determine undervalued/overvalued stocks
    comparison = pd.DataFrame({
        'Beta': betas,
        'Expected Return': expected_returns,
        'Average Return': average_returns
    })
    comparison['Over/Under Valued'] = comparison.apply(
        lambda row: 'Undervalued' if row['Average Return'] > row['Expected Return'] else 'Overvalued',
        axis=1
    )
    
    return comparison

if 'index' in daily_returns.columns:
    daily_returns = daily_returns.drop(['index'], axis=1).copy()

market_return = np.prod(1 + sp_500_daily_returns.dropna())**(252 / len(sp_500_daily_returns.dropna())) - 1
risk_free_rate = 0.0477 
result_df = calc_expectedreturns(daily_returns, risk_free_rate,betas, market_return)
print(result_df)

          Beta  Expected Return  Average Return Over/Under Valued
A     0.912992         0.119640       -0.361128        Overvalued
AAL   1.348263         0.153937       -0.107494        Overvalued
AAPL  1.150799         0.138378        0.378982       Undervalued
ABBV  0.130669         0.057996       -0.104357        Overvalued
ABNB  1.690057         0.180869        0.479042       Undervalued
...        ...              ...             ...               ...
YUM   0.556312         0.091535       -0.068445        Overvalued
ZBH   0.571193         0.092707       -0.225642        Overvalued
ZBRA  1.702181         0.181824       -0.195368        Overvalued
ZION  2.221824         0.222770       -0.390007        Overvalued
ZTS   0.977542         0.124726        0.193654       Undervalued

[500 rows x 4 columns]


In [110]:
undervalued_stocks = result_df[result_df['Over/Under Valued'] == 'Undervalued']
undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
undervalued_count = len(undervalued_stocks)

if undervalued_count > 200:
    # If there are more than 200 undervalued stocks, select the MOST undervalued
    most_undervalued_stocks = undervalued_stocks.sort_values(by='Undervalued Score', ascending=False)
    top_undervalued_stocks = most_undervalued_stocks.head(200)
    selected_stocks = top_undervalued_stocks.index

elif undervalued_count < 200:
    # If there are less than 200 undervalued stocks, select out of the overvalued the least overvalued ones
    overvalued_stocks = result_df[result_df['Over/Under Valued'] == 'Overvalued']
    overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']
    sorted_overvalued_stocks = overvalued_stocks.sort_values(by='Potential', ascending=False)
    combined_stocks = pd.concat([undervalued_stocks, sorted_overvalued_stocks.head(200 - undervalued_count)])
    selected_stocks = combined_stocks.index
    
else: 
    selected_stocks = undervalued_stocks.index

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']


In [120]:
newdata = price_df.drop(price_df.columns[0], axis=1)
returns = np.log(newdata/newdata.shift(1))
returns = returns.drop(returns.index[0])  
filtered_returns = returns[selected_stocks]

n_stocks = len(selected_stocks)

In [131]:
def objective_function(weights: list, returns):
    
    mean_returns = np.mean(returns, axis=0)
    portfolio_return = weights @  mean_returns

    portfolio_std = np.sqrt(weights @ np.cov(returns.T) @ weights.T)

    return -1 * (portfolio_return - 0.25 * portfolio_std)  # Minimize the negative of the objective

initial_weights = np.array([1 / n_stocks] * n_stocks)
constraints = ( 
        {'type': 'ineq', 'fun': lambda weights: 0.85 - np.sum(weights)},  # Sum of weights >= 0.85
        {'type': 'ineq', 'fun': lambda weights: np.sum(weights) - 1.0}  # Sum of weights <= 1
    )
bounds = tuple((0, 1) for x in range(n_stocks))


optimized = opt.minimize(objective_function, initial_weights, args= (filtered_returns), bounds=bounds, constraints=constraints)  # Adjust the method as needed
optimal_weights = optimized.x

In [132]:
def calc_normalized_weights(optimal_weights, stock_names, threshold = 0.01):

    thresholded_weights = np.where(optimal_weights >= threshold, optimal_weights, 0)

    if np.sum(thresholded_weights) > 0:  # Prevent division by zero
        normalized_weights = thresholded_weights / np.sum(thresholded_weights)
    else:
        normalized_weights = thresholded_weights  # In case all are zero, which should not happen
    
    stocks_with_weights = [(stock, weight) for stock, weight in zip(stock_names, normalized_weights) if weight >= threshold]
    # Print stocks with their corresponding weights
    
    for stock, weight in stocks_with_weights:
        print(f"{stock}: {weight}")
    return normalized_weights

stock_names = returns.columns
normalized_weights = calc_normalized_weights(optimal_weights, stock_names)

AMP: 0.21619339248902458
APH: 0.025008820969735127
BMY: 0.12754032711157415
CDNS: 0.17885060886183704
CHD: 0.05343033382738408
CHTR: 0.14232028657568482
CLX: 0.04722978937923598
COP: 0.10380820813611627
CSCO: 0.027424325473572094
CSX: 0.024113171348031374
ELV: 0.054080735827804476
