In [6]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings
warnings.filterwarnings('ignore')

sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-')

symbols_list = sp500['Symbol'].unique().tolist()

end_date = '2023-09-27'



In [7]:
start_date = pd.to_datetime(end_date)-pd.DateOffset(365*8)

df = yf.download(tickers=symbols_list,
                 start=start_date,
                 end=end_date).stack()

df.index.names = ['date', 'ticker']

df.columns = df.columns.str.lower()

df

[*********************100%%**********************]  503 of 503 completed


1 Failed download:
['VLTO']: Exception("%ticker%: Data doesn't exist for startDate = 1443499200, endDate = 1695787200")





Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-09-29,A,31.588047,33.740002,34.060001,33.240002,33.360001,2252400.0
2015-09-29,AAL,37.361618,39.180000,39.770000,38.790001,39.049999,7478800.0
2015-09-29,AAPL,24.748632,27.264999,28.377501,26.965000,28.207500,293461600.0
2015-09-29,ABBV,37.024639,52.790001,54.189999,51.880001,53.099998,12842800.0
2015-09-29,ABT,33.807274,39.500000,40.150002,39.029999,39.259998,12287500.0
...,...,...,...,...,...,...,...
2023-09-26,YUM,124.010002,124.010002,124.739998,123.449997,124.239998,1500600.0
2023-09-26,ZBH,112.216316,112.459999,117.110001,112.419998,116.769997,3610500.0
2023-09-26,ZBRA,223.960007,223.960007,226.649994,222.580002,225.970001,355400.0
2023-09-26,ZION,33.990002,33.990002,34.700001,33.840000,33.840000,1586100.0


In [8]:
df['garman_klass_vol'] = ((np.log(df['high'])-np.log(df['low']))**2)/2-(2*np.log(2)-1)*((np.log(df['adj close'])-np.log(df['open']))**2)

df['rsi'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.rsi(close=x, length=20))

df['bb_low'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,0])
                                                          
df['bb_mid'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,1])
                                                          
df['bb_high'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,2])

def compute_atr(stock_data):
    atr = pandas_ta.atr(high=stock_data['high'],
                        low=stock_data['low'],
                        close=stock_data['close'],
                        length=14)
    return atr.sub(atr.mean()).div(atr.std())

df['atr'] = df.groupby(level=1, group_keys=False).apply(compute_atr)

def compute_macd(close):
    macd = pandas_ta.macd(close=close, length=20).iloc[:,0]
    return macd.sub(macd.mean()).div(macd.std())

df['macd'] = df.groupby(level=1, group_keys=False)['adj close'].apply(compute_macd)

df['dollar_volume'] = (df['adj close']*df['volume'])/1e6

df


Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-09-29,A,31.588047,33.740002,34.060001,33.240002,33.360001,2252400.0,-0.000854,,,,,,,71.148917
2015-09-29,AAL,37.361618,39.180000,39.770000,38.790001,39.049999,7478800.0,-0.000443,,,,,,,279.420069
2015-09-29,AAPL,24.748632,27.264999,28.377501,26.965000,28.207500,293461600.0,-0.005307,,,,,,,7262.773271
2015-09-29,ABBV,37.024639,52.790001,54.189999,51.880001,53.099998,12842800.0,-0.049280,,,,,,,475.500035
2015-09-29,ABT,33.807274,39.500000,40.150002,39.029999,39.259998,12287500.0,-0.008237,,,,,,,415.406878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-26,YUM,124.010002,124.010002,124.739998,123.449997,124.239998,1500600.0,0.000053,36.057176,4.826202,4.856171,4.886139,0.142547,-1.363696,186.089409
2023-09-26,ZBH,112.216316,112.459999,117.110001,112.419998,116.769997,3610500.0,0.000224,31.893246,4.751923,4.791592,4.831260,-0.381708,-0.881067,405.157010
2023-09-26,ZBRA,223.960007,223.960007,226.649994,222.580002,225.970001,355400.0,0.000133,29.494977,5.400991,5.539167,5.677342,-0.057389,-1.600791,79.595386
2023-09-26,ZION,33.990002,33.990002,34.700001,33.840000,33.840000,1586100.0,0.000307,46.707773,3.539073,3.594527,3.649982,-0.161699,-0.164625,53.911542


In [9]:
df['garman_klass_vol'] = ((np.log(df['high'])-np.log(df['low']))**2)/2-(2*np.log(2)-1)*((np.log(df['adj close'])-np.log(df['open']))**2)

df['rsi'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.rsi(close=x, length=20))

df['bb_low'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,0])
                                                          
df['bb_mid'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,1])
                                                          
df['bb_high'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:,2])

def compute_atr(stock_data):
    atr = pandas_ta.atr(high=stock_data['high'],
                        low=stock_data['low'],
                        close=stock_data['close'],
                        length=14)
    return atr.sub(atr.mean()).div(atr.std())

df['atr'] = df.groupby(level=1, group_keys=False).apply(compute_atr)

def compute_macd(close):
    macd = pandas_ta.macd(close=close, length=20).iloc[:,0]
    return macd.sub(macd.mean()).div(macd.std())

df['macd'] = df.groupby(level=1, group_keys=False)['adj close'].apply(compute_macd)

df['dollar_volume'] = (df['adj close']*df['volume'])/1e6

df

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-09-29,A,31.588047,33.740002,34.060001,33.240002,33.360001,2252400.0,-0.000854,,,,,,,71.148917
2015-09-29,AAL,37.361618,39.180000,39.770000,38.790001,39.049999,7478800.0,-0.000443,,,,,,,279.420069
2015-09-29,AAPL,24.748632,27.264999,28.377501,26.965000,28.207500,293461600.0,-0.005307,,,,,,,7262.773271
2015-09-29,ABBV,37.024639,52.790001,54.189999,51.880001,53.099998,12842800.0,-0.049280,,,,,,,475.500035
2015-09-29,ABT,33.807274,39.500000,40.150002,39.029999,39.259998,12287500.0,-0.008237,,,,,,,415.406878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-26,YUM,124.010002,124.010002,124.739998,123.449997,124.239998,1500600.0,0.000053,36.057176,4.826202,4.856171,4.886139,0.142547,-1.363696,186.089409
2023-09-26,ZBH,112.216316,112.459999,117.110001,112.419998,116.769997,3610500.0,0.000224,31.893246,4.751923,4.791592,4.831260,-0.381708,-0.881067,405.157010
2023-09-26,ZBRA,223.960007,223.960007,226.649994,222.580002,225.970001,355400.0,0.000133,29.494977,5.400991,5.539167,5.677342,-0.057389,-1.600791,79.595386
2023-09-26,ZION,33.990002,33.990002,34.700001,33.840000,33.840000,1586100.0,0.000307,46.707773,3.539073,3.594527,3.649982,-0.161699,-0.164625,53.911542


In [10]:
last_cols = [c for c in df.columns.unique(0) if c not in ['dollar_volume', 'volume', 'open',
                                                          'high', 'low', 'close']]

data = (pd.concat([df.unstack('ticker')['dollar_volume'].resample('M').mean().stack('ticker').to_frame('dollar_volume'),
                   df.unstack()[last_cols].resample('M').last().stack('ticker')],
                  axis=1)).dropna()

data

Unnamed: 0_level_0,Unnamed: 1_level_0,dollar_volume,adj close,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-11-30,A,136.444144,39.152687,-0.001810,73.421489,3.549210,3.621664,3.694119,-1.033887,0.567157
2015-11-30,AAL,287.915800,39.429935,-0.000966,40.719049,3.672028,3.749832,3.827636,0.190822,-0.418771
2015-11-30,AAPL,4039.899060,26.960350,-0.003027,55.537384,3.285478,3.328796,3.372114,-0.967900,-0.142789
2015-11-30,ABBV,343.971764,41.160297,-0.053947,49.376835,3.745051,3.793320,3.841588,-0.526809,0.145677
2015-11-30,ABT,213.736336,38.669388,-0.009962,56.962500,3.665571,3.687430,3.709289,-1.064842,0.335557
...,...,...,...,...,...,...,...,...,...,...
2023-09-30,OTIS,156.200745,79.290001,0.000093,33.116257,4.381831,4.427125,4.472419,-1.028320,-1.534536
2023-09-30,ABNB,1633.500725,132.279999,0.000213,44.494127,4.857047,4.940924,5.024801,-1.006939,-0.037854
2023-09-30,CEG,197.815385,108.489998,0.000274,55.245471,4.657897,4.698072,4.738248,-0.436215,0.366876
2023-09-30,GEHC,212.434213,66.179550,0.000185,40.922330,4.156170,4.213706,4.271243,-0.893478,-1.116463


In [11]:
data['dollar_volume'] = (data.loc[:, 'dollar_volume'].unstack('ticker').rolling(5*12, min_periods=12).mean().stack())

data['dollar_vol_rank'] = (data.groupby('date')['dollar_volume'].rank(ascending=False))

data = data[data['dollar_vol_rank']<150].drop(['dollar_volume', 'dollar_vol_rank'], axis=1)

data

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-10-31,AAL,39.134331,-0.000176,62.203549,3.604673,3.655494,3.706314,0.402199,1.131595
2016-10-31,AAPL,26.316141,-0.002228,49.891019,3.298038,3.326922,3.355806,-1.038688,-0.195978
2016-10-31,ABBV,41.009075,-0.041756,27.477645,3.771814,3.826001,3.880188,-0.893132,-0.760594
2016-10-31,ABT,34.630028,-0.006476,38.008836,3.564121,3.614608,3.665095,-1.035224,-0.650888
2016-10-31,ACN,104.350311,-0.004026,53.823663,4.644779,4.656418,4.668056,-0.996806,-0.135457
...,...,...,...,...,...,...,...,...,...
2023-09-30,WFC,40.650002,0.000234,40.920273,3.718132,3.758516,3.798900,-0.558742,-0.282325
2023-09-30,WMT,162.500000,0.000024,54.722508,5.081613,5.099300,5.116986,-0.196379,0.399459
2023-09-30,XOM,116.410004,0.000045,59.440192,4.713293,4.753399,4.793504,0.601335,1.400623
2023-09-30,MRNA,98.120003,0.000146,38.747314,4.582514,4.685332,4.788149,-0.529511,-0.376899


In [12]:
def calculate_returns(df):

    outlier_cutoff = 0.005

    lags = [1, 2, 3, 6, 9, 12]

    for lag in lags:

        df[f'return_{lag}m'] = (df['adj close']
                              .pct_change(lag)
                              .pipe(lambda x: x.clip(lower=x.quantile(outlier_cutoff),
                                                     upper=x.quantile(1-outlier_cutoff)))
                              .add(1)
                              .pow(1/lag)
                              .sub(1))
    return df
    
    
data = data.groupby(level=1, group_keys=False).apply(calculate_returns).dropna()

data

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-10-31,AAL,45.534172,-0.000363,41.051790,3.849110,3.921750,3.994389,1.011062,-0.018698,-0.014108,0.022981,-0.023860,0.016495,0.007008,0.012702
2017-10-31,AAPL,39.870972,-0.000892,69.196763,3.598569,3.645446,3.692324,-0.906642,-0.039276,0.096807,0.015250,0.044955,0.028875,0.038941,0.035228
2017-10-31,ABBV,68.772278,-0.029822,55.247804,4.215227,4.261600,4.307973,0.375557,0.473812,0.022727,0.098590,0.091379,0.056495,0.047273,0.044026
2017-10-31,ABT,48.969299,-0.004349,53.844838,3.902136,3.925710,3.949285,-1.040044,0.276132,0.021275,0.034308,0.034801,0.038672,0.031320,0.029294
2017-10-31,ACN,130.375061,-0.003359,69.365043,4.810123,4.849805,4.889486,-0.986514,0.352341,0.064180,0.048454,0.037202,0.028692,0.027398,0.018728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30,WFC,40.650002,0.000234,40.920273,3.718132,3.758516,3.798900,-0.558742,-0.282325,-0.015500,-0.057917,-0.013554,0.016712,0.000702,0.003255
2023-09-30,WMT,162.500000,0.000024,54.722508,5.081613,5.099300,5.116986,-0.196379,0.399459,-0.000676,0.010014,0.012354,0.017574,0.016553,0.020256
2023-09-30,XOM,116.410004,0.000045,59.440192,4.713293,4.753399,4.793504,0.601335,1.400623,0.046947,0.046139,0.030496,0.012838,0.008747,0.027037
2023-09-30,MRNA,98.120003,0.000146,38.747314,4.582514,4.685332,4.788149,-0.529511,-0.376899,-0.132219,-0.086803,-0.068763,-0.071952,-0.064976,-0.015431


In [13]:
factor_data = web.DataReader('F-F_Research_Data_5_Factors_2x3',
                               'famafrench',
                               start='2010')[0].drop('RF', axis=1)

factor_data.index = factor_data.index.to_timestamp()

factor_data = factor_data.resample('M').last().div(100)

factor_data.index.name = 'date'

factor_data = factor_data.join(data['return_1m']).sort_index()

factor_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA,return_1m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-10-31,AAL,0.0225,-0.0194,0.0020,0.0093,-0.0325,-0.014108
2017-10-31,AAPL,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.096807
2017-10-31,ABBV,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.022727
2017-10-31,ABT,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.021275
2017-10-31,ACN,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.064180
...,...,...,...,...,...,...,...
2023-07-31,VZ,0.0321,0.0286,0.0413,-0.0056,0.0062,-0.067220
2023-07-31,WFC,0.0321,0.0286,0.0413,-0.0056,0.0062,0.081537
2023-07-31,WMT,0.0321,0.0286,0.0413,-0.0056,0.0062,0.017051
2023-07-31,WYNN,0.0321,0.0286,0.0413,-0.0056,0.0062,0.031910


In [14]:
observations = factor_data.groupby(level=1).size()

valid_stocks = observations[observations >= 10]

factor_data = factor_data[factor_data.index.get_level_values('ticker').isin(valid_stocks.index)]

factor_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA,return_1m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-10-31,AAL,0.0225,-0.0194,0.0020,0.0093,-0.0325,-0.014108
2017-10-31,AAPL,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.096807
2017-10-31,ABBV,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.022727
2017-10-31,ABT,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.021275
2017-10-31,ACN,0.0225,-0.0194,0.0020,0.0093,-0.0325,0.064180
...,...,...,...,...,...,...,...
2023-07-31,VZ,0.0321,0.0286,0.0413,-0.0056,0.0062,-0.067220
2023-07-31,WFC,0.0321,0.0286,0.0413,-0.0056,0.0062,0.081537
2023-07-31,WMT,0.0321,0.0286,0.0413,-0.0056,0.0062,0.017051
2023-07-31,WYNN,0.0321,0.0286,0.0413,-0.0056,0.0062,0.031910


In [15]:
betas = (factor_data.groupby(level=1,
                            group_keys=False)
         .apply(lambda x: RollingOLS(endog=x['return_1m'], 
                                     exog=sm.add_constant(x.drop('return_1m', axis=1)),
                                     window=min(24, x.shape[0]),
                                     min_nobs=len(x.columns)+1)
         .fit(params_only=True)
         .params
         .drop('const', axis=1)))

betas 

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-31,AAL,,,,,
2017-10-31,AAPL,,,,,
2017-10-31,ABBV,,,,,
2017-10-31,ABT,,,,,
2017-10-31,ACN,,,,,
...,...,...,...,...,...,...
2023-07-31,VZ,0.328750,-0.082294,0.244886,0.261152,0.175731
2023-07-31,WFC,1.065927,0.205755,2.083805,-0.248225,-1.631918
2023-07-31,WMT,0.774863,-0.183056,-0.580126,-0.202141,0.755490
2023-07-31,WYNN,1.099833,0.096335,0.042646,0.348799,-0.029272


In [16]:
Join the rolling factors data to the main features dataframe.
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

data = (data.join(betas.groupby('ticker').shift()))

data.loc[:, factors] = data.groupby('ticker', group_keys=False)[factors].apply(lambda x: x.fillna(x.mean()))

data = data.drop('adj close', axis=1)

data = data.dropna()

data.info()

SyntaxError: invalid syntax (1917061262.py, line 1)

In [None]:
from sklearn.cluster import KMeans

data = data.drop('cluster', axis=1)

def get_clusters(df):
    df['cluster'] = KMeans(n_clusters=4,
                           random_state=0,
                           init=initial_centroids).fit(df).labels_
    return df

data = data.dropna().groupby('date', group_keys=False).apply(get_clusters)

data

In [None]:
def plot_clusters(data):

    cluster_0 = data[data['cluster']==0]
    cluster_1 = data[data['cluster']==1]
    cluster_2 = data[data['cluster']==2]
    cluster_3 = data[data['cluster']==3]

    plt.scatter(cluster_0.iloc[:,0] , cluster_0.iloc[:,6] , color = 'red', label='cluster 0')
    plt.scatter(cluster_1.iloc[:,0] , cluster_1.iloc[:,6] , color = 'green', label='cluster 1')
    plt.scatter(cluster_2.iloc[:,0] , cluster_2.iloc[:,6] , color = 'blue', label='cluster 2')
    plt.scatter(cluster_3.iloc[:,0] , cluster_3.iloc[:,6] , color = 'black', label='cluster 3')
    
    plt.legend()
    plt.show()
    return

In [None]:
plt.style.use('ggplot')

for i in data.index.get_level_values('date').unique().tolist():
    
    g = data.xs(i, level=0)
    
    plt.title(f'Date {i}')
    
    plot_clusters(g)

In [None]:
target_rsi_values = [30, 45, 55, 70]

initial_centroids = np.zeros((len(target_rsi_values), 18))

initial_centroids[:, 6] = target_rsi_values

initial_centroids

In [None]:
filtered_df = data[data['cluster']==3].copy()

filtered_df = filtered_df.reset_index(level=1)

filtered_df.index = filtered_df.index+pd.DateOffset(1)

filtered_df = filtered_df.reset_index().set_index(['date', 'ticker'])

dates = filtered_df.index.get_level_values('date').unique().tolist()

fixed_dates = {}

for d in dates:
    
    fixed_dates[d.strftime('%Y-%m-%d')] = filtered_df.xs(d, level=0).index.tolist()
    
fixed_dates

In [None]:
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

def optimize_weights(prices, lower_bound=0):
    
    returns = expected_returns.mean_historical_return(prices=prices,
                                                      frequency=252)
    
    cov = risk_models.sample_cov(prices=prices,
                                 frequency=252)
    
    ef = EfficientFrontier(expected_returns=returns,
                           cov_matrix=cov,
                           weight_bounds=(lower_bound, .1),
                           solver='SCS')
    
    weights = ef.max_sharpe()
    
    return ef.clean_weights()

In [None]:
stocks = data.index.get_level_values('ticker').unique().tolist()

new_df = yf.download(tickers=stocks,
                     start=data.index.get_level_values('date').unique()[0]-pd.DateOffset(months=12),
                     end=data.index.get_level_values('date').unique()[-1])

new_df

In [None]:
returns_dataframe = np.log(new_df['Adj Close']).diff()

portfolio_df = pd.DataFrame()

for start_date in fixed_dates.keys():
    
    try:

        end_date = (pd.to_datetime(start_date)+pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')

        cols = fixed_dates[start_date]

        optimization_start_date = (pd.to_datetime(start_date)-pd.DateOffset(months=12)).strftime('%Y-%m-%d')

        optimization_end_date = (pd.to_datetime(start_date)-pd.DateOffset(days=1)).strftime('%Y-%m-%d')
        
        optimization_df = new_df[optimization_start_date:optimization_end_date]['Adj Close'][cols]
        
        success = False
        try:
            weights = optimize_weights(prices=optimization_df,
                                   lower_bound=round(1/(len(optimization_df.columns)*2),3))

            weights = pd.DataFrame(weights, index=pd.Series(0))
            
            success = True
        except:
            print(f'Max Sharpe Optimization failed for {start_date}, Continuing with Equal-Weights')
        
        if success==False:
            weights = pd.DataFrame([1/len(optimization_df.columns) for i in range(len(optimization_df.columns))],
                                     index=optimization_df.columns.tolist(),
                                     columns=pd.Series(0)).T
        
        temp_df = returns_dataframe[start_date:end_date]

        temp_df = temp_df.stack().to_frame('return').reset_index(level=0)\
                   .merge(weights.stack().to_frame('weight').reset_index(level=0, drop=True),
                          left_index=True,
                          right_index=True)\
                   .reset_index().set_index(['Date', 'index']).unstack().stack()

        temp_df.index.names = ['date', 'ticker']

        temp_df['weighted_return'] = temp_df['return']*temp_df['weight']

        temp_df = temp_df.groupby(level=0)['weighted_return'].sum().to_frame('Strategy Return')

        portfolio_df = pd.concat([portfolio_df, temp_df], axis=0)
    
    except Exception as e:
        print(e)

portfolio_df = portfolio_df.drop_duplicates()

portfolio_df

In [None]:
spy = yf.download(tickers='SPY',
                  start='2015-01-01',
                  end=dt.date.today())

spy_ret = np.log(spy[['Adj Close']]).diff().dropna().rename({'Adj Close':'SPY Buy&Hold'}, axis=1)

portfolio_df = portfolio_df.merge(spy_ret,
                                  left_index=True,
                                  right_index=True)

portfolio_df

In [None]:
import matplotlib.ticker as mtick

plt.style.use('ggplot')

portfolio_cumulative_return = np.exp(np.log1p(portfolio_df).cumsum())-1

portfolio_cumulative_return[:'2023-09-29'].plot(figsize=(16,6))

plt.title('Unsupervised Learning Trading Strategy Returns Over Time')

plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1))

plt.ylabel('Return')

plt.show()