In [24]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.optimize as opt

The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [25]:
# data: 
folder = 'data'
xls_dict  = pd.read_excel(folder + '/trading-game-data-20102023.xlsx', sheet_name=None)

index_price_df = xls_dict['index-price']
price_df = xls_dict['price']
size_df = xls_dict['size']
price_to_book_df = xls_dict['price-to-book']
turnover_df = xls_dict['turnover']

## Markowitz Portfolio Theory

In [26]:
price_df = xls_dict['price'].reset_index()
price_df = price_df.drop('index', axis = 1)
price_df['Date'] = pd.to_datetime(price_df['Date'])
price_df.set_index('Date', inplace=True)
daily_returns = price_df.pct_change()
expected_returns = daily_returns.mean()
risk = daily_returns.std()

risk.sort_values()

WMT      0.008431
KO       0.008471
MCD      0.008852
BRK.B    0.008859
PG       0.009301
           ...   
KEY      0.036913
SEDG     0.038979
CMA      0.040414
CTLT     0.041364
ZION     0.041840
Length: 500, dtype: float64

## CAPM model
- Time horizon is the full dataframe

In [27]:
def calc_CAPM_betas(daily_returns, sp_500_daily_returns):
    """
    Calculate the CAPM beta values for the stocks in the daily_returns DataFrame.
    """
    
    # Join the daily returns of the stocks with the S&P 500 daily returns
    daily_returns_with_sp500 = daily_returns.join(sp_500_daily_returns.rename('SP500'))
    
    # Calculate the covariance matrix of the returns
    cov_matrix_with_sp500 = daily_returns_with_sp500.cov()
    
    # The market variance is the variance of the S&P 500 returns
    market_var = sp_500_daily_returns.var()
    
    # Calculate the betas for each stock
    betas = cov_matrix_with_sp500.loc[:, 'SP500'] / market_var
    
    # if ['index'] in betas.columns: 
    betas = betas.drop(['SP500'], axis=0)
    return betas

index_price_df = xls_dict['index-price'].reset_index()
index_price_df['Date'] = pd.to_datetime(index_price_df['Date'])
index_price_df.set_index('Date', inplace=True)
sp_500_daily_returns = index_price_df['S&P 500'].pct_change()

n_observations = index_price_df.shape[0]

betas = calc_CAPM_betas(daily_returns, sp_500_daily_returns)
betas

A       0.912992
AAL     1.348263
AAPL    1.150799
ABBV    0.130669
ABNB    1.690057
          ...   
YUM     0.556312
ZBH     0.571193
ZBRA    1.702181
ZION    2.221824
ZTS     0.977542
Name: SP500, Length: 500, dtype: float64

In [28]:
def calc_expectedreturns(daily_returns, rf_rate, betas, market_return):
    expected_returns = rf_rate + betas * (market_return - rf_rate)
    
    average_returns = daily_returns.mean() * n_observations  # Assuming 252 trading days in a year

    # Step 3: Determine undervalued/overvalued stocks
    comparison = pd.DataFrame({
        'Beta': betas,
        'Expected Return': expected_returns,
        'Average Return': average_returns
    })
    comparison['Over/Under Valued'] = comparison.apply(
        lambda row: 'Undervalued' if row['Average Return'] > row['Expected Return'] else 'Overvalued',
        axis=1
    )
    
    return comparison

if 'index' in daily_returns.columns:
    daily_returns = daily_returns.drop(['index'], axis=1).copy()

market_return = np.prod(1 + sp_500_daily_returns.dropna())**(n_observations / len(sp_500_daily_returns.dropna())) - 1
risk_free_rate = 0.0477 
result_df = calc_expectedreturns(daily_returns, risk_free_rate,betas, market_return)
print(result_df)

          Beta  Expected Return  Average Return Over/Under Valued
A     0.912992         0.096093       -0.290908        Overvalued
AAL   1.348263         0.119165       -0.086592        Overvalued
AAPL  1.150799         0.108698        0.305291       Undervalued
ABBV  0.130669         0.054626       -0.084066        Overvalued
ABNB  1.690057         0.137282        0.385895       Undervalued
...        ...              ...             ...               ...
YUM   0.556312         0.077187       -0.055136        Overvalued
ZBH   0.571193         0.077976       -0.181767        Overvalued
ZBRA  1.702181         0.137924       -0.157380        Overvalued
ZION  2.221824         0.165468       -0.314172        Overvalued
ZTS   0.977542         0.099515        0.155999       Undervalued

[500 rows x 4 columns]


In [29]:
undervalued_stocks = result_df[result_df['Over/Under Valued'] == 'Undervalued']
undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
undervalued_count = len(undervalued_stocks)

if undervalued_count > 200:
    # If there are more than 200 undervalued stocks, select the MOST undervalued
    most_undervalued_stocks = undervalued_stocks.sort_values(by='Undervalued Score', ascending=False)
    top_undervalued_stocks = most_undervalued_stocks.head(200)
    selected_stocks = top_undervalued_stocks.index

elif undervalued_count < 200:
    # If there are less than 200 undervalued stocks, select out of the overvalued the least overvalued ones
    overvalued_stocks = result_df[result_df['Over/Under Valued'] == 'Overvalued']
    overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']
    sorted_overvalued_stocks = overvalued_stocks.sort_values(by='Potential', ascending=False)
    combined_stocks = pd.concat([undervalued_stocks, sorted_overvalued_stocks.head(200 - undervalued_count)])
    selected_stocks = combined_stocks.index
    
else: 
    selected_stocks = undervalued_stocks.index

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  undervalued_stocks['Undervalued Score'] = undervalued_stocks['Average Return'] - undervalued_stocks['Expected Return']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  overvalued_stocks['Potential'] = overvalued_stocks['Average Return'] - overvalued_stocks['Expected Return']


In [62]:
selected_stock_names = stocks_with_weights['Stock'].tolist()
filtered_result_df = result_df[result_df.index.isin(selected_stock_names)]
filtered_result_df

Unnamed: 0,Beta,Expected Return,Average Return,Over/Under Valued
CBOE,0.266317,0.061816,0.268429,Undervalued
CME,0.385632,0.06814,0.24959,Undervalued
COR,0.192448,0.057901,0.141983,Undervalued
GE,0.900295,0.09542,0.509843,Undervalued
LLY,0.349637,0.066233,0.503073,Undervalued
MCK,0.232792,0.060039,0.204817,Undervalued
META,1.889666,0.147862,1.016746,Undervalued
MPC,0.646106,0.081947,0.300383,Undervalued
NVDA,2.138154,0.161033,1.150698,Undervalued
PANW,1.157214,0.109038,0.617808,Undervalued


In [52]:
newdata = price_df.drop(price_df.columns[0], axis=1)
returns = np.log(newdata/newdata.shift(1))
returns = returns.drop(returns.index[0])  
returns = returns[selected_stocks]

n_stocks = len(selected_stocks)

0.009317762686410273
0.009317762806667663
0.0093177629115169
0.009317762787927683
0.009317762824405178
0.00931776288113289
0.009317762781127295
0.009317762782367052
0.009317762779017023
0.009317762791227603
0.009317762905458155
0.009317762891896758
0.009317762907507847
0.009317762731821925
0.009317762847237615
0.009317762858687554
0.009317762856910263
0.009317762847194109
0.009317762760694289
0.009317762854954321
0.009317762819299146
0.009317762824531485
0.009317762832770098
0.009317762791848799
0.009317762779819138
0.00931776279233709
0.009317762749874644
0.00931776284390771
0.009317762903932686
0.009317762747443288
0.009317762870674085
0.009317762716681477
0.009317762977211869
0.009317762895832757
0.009317762843812341
0.009317762818435736
0.009317762906142163
0.009317762809960859
0.009317762711756132
0.00931776283483738
0.00931776280533032
0.009317762733905369
0.009317762775155215
0.009317762796860947
0.009317762711319776
0.009317762781015164
0.009317762815983297
0.009317762828847574

In [56]:
def calc_normalized_weights(optimal_weights, stock_names, threshold=0.005):
    thresholded_weights = np.where(optimal_weights >= threshold, optimal_weights, 0)

    if np.sum(thresholded_weights) > 0:  # Prevent division by zero
        normalized_weights = thresholded_weights / np.sum(thresholded_weights)
    else:
        normalized_weights = thresholded_weights  # In case all are zero, which should not happen

    # Create a DataFrame from the stock names and their corresponding weights
    df_stocks_with_weights = pd.DataFrame({
        'Stock': stock_names,
        'Weight': normalized_weights
    })

    df_stocks_with_weights = df_stocks_with_weights[df_stocks_with_weights['Weight'] >= threshold]

    return df_stocks_with_weights

stock_names = returns.columns
stocks_with_weights = calc_normalized_weights(optimal_weights, stock_names)
stocks_with_weights

Unnamed: 0,Stock,Weight
30,CBOE,0.214618
40,CME,0.02483
43,COR,0.006464
69,GE,0.126839
88,LLY,0.177708
94,MCK,0.053257
96,META,0.14133
100,MPC,0.046868
111,NVDA,0.103183
119,PANW,0.027219


In [55]:
stock_names = returns.columns
stocks_with_weights = calc_normalized_weights(optimal_weights, stock_names, threshold=0.01)
stocks_with_weights

Unnamed: 0,Stock,Weight
30,CBOE,0.216014
40,CME,0.024991
69,GE,0.127664
88,LLY,0.178864
94,MCK,0.053604
96,META,0.142249
100,MPC,0.047173
111,NVDA,0.103855
119,PANW,0.027396
121,PGR,0.024103


In [48]:
stock_names = returns.columns
stocks_with_weights = calc_normalized_weights(optimal_weights, stock_names, threshold=0.001)
stocks_with_weights.shape

(12, 2)