## Packages & Preamble

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from return_utils import spearman_corr,factors_to_pos,collecting_stock_percentages,collecting_stock_data_avg,pos_to_return
import os
import empyrical



In [2]:
%load_ext autoreload
%autoreload 2

Load the list of tickers: note that we remove all appearing post 2017 to avoid
selection bias.

In [3]:
s_and_p = pd.read_csv('csvs/sp500/sp500_tickers.csv').iloc[:, 1]
s_and_p = list(s_and_p.map(
    lambda x: x.replace('\n', '')).values)
post_2017=['KDP','FOXA','ON','VICI','CPT','MOH','NDSN','CEG','SBNY','SEDG','FDS','EPAM','MTCH','CDAY','BRO','TECH','MRNA','OGN'
            ,'CRL','PTC','NXPI','PENN','GNRC','CZR','MPWR','TRMB','ENPH','TSLA','VNT','POOL','ETSY','TER','CTLT','BIO','TDY','TYL'
            ,'WST','DPZ','DXCM','OTIS','CARR','HWM','IR','PAYC','LYV',"ZBRA",'STE','ODFL','WRB','NOW','LVS','NVR','CDW','LDOS','IEX','TMUS'
            ,'MKTX','AMCRT','DD','CTVA','DOW','WAB','ATO','TFX','FRC','CE','LW','MXIM','FANG','JKHY','KEYS','FTNT','ROL','WCG','ANET','CPRT','FLT','BR'
            ,'HFC',"TWTR",'EVRG','ABMD','MSCI','TTWO','SIVB','NKTR','IPGP','HII','NCLH','CDNS','DWDP','SBAC','Q','BHF','DRE','AOS','PKG','RMD'
            ,'MGM','HLT','ALGN','ANSS','RE','INFO']
s_and_p=list(set(s_and_p)-set(post_2017))
post_2017=list(set(s_and_p)&set(post_2017))


Collect stock close and returns

In [4]:
stock_closes_df = collecting_stock_data_avg(tickers=s_and_p)
stock_closes_df.to_csv('csvs/sp500/collected_stock_closes.csv')
stock_closes_df.index=pd.to_datetime(stock_closes_df.index.values,format='%Y-%m-%d')
stock_closes_df.fillna(stock_closes_df.mean(axis=1),inplace=True)
stock_perc_df =collecting_stock_percentages(s_and_p)
stock_perc_df.index=pd.to_datetime(stock_perc_df.index.values,format='%Y-%m-%d')
stock_perc_df.fillna(stock_perc_df.mean(axis=1),inplace=True)
stock_closes_rets_df=pd.concat([stock_closes_df,stock_perc_df],axis=1)
stock_closes_rets_df.to_csv('csvs/sp500/collected_stock closes_and_percentage_returns.csv')


Collect bnh returns only

In [5]:
bnh_returns_df=stock_perc_df.mean(axis=1)
bnh_returns_df.to_csv('csvs/sp500/bnh_returns.csv')

# Log Returns TDA average values for strategies

In [6]:
def factor_collecting(tickers, col='Avg_Betti',inputpath='sp500/betti_dataframes/'):
    fact_df = pd.DataFrame(dtype=np.float64)
    for t in tqdm(tickers):
        df = pd.read_csv(inputpath+t+'.csv', index_col='Date')
        df2 = pd.DataFrame({t: df[col]}, index=df.index)
        fact_df = pd.concat([fact_df, df2], axis=1)
    return(fact_df)

In [15]:
if not os.path.exists('Persim_vs_Betti/pers_land_values.csv'):
    collected_tda_df=factor_collecting(tickers=s_and_p,col='Persistence landscape norm', inputpath='Persim_vs_Betti/sp500/landscape/').loc['2017-07-19':'2022-06-01']
    collected_tda_df.to_csv('Persim_vs_Betti/pers_land_values.csv')


if not os.path.exists('Persim_vs_Betti/unweighted_betti_values.csv'):
    unweighted_betti_df=factor_collecting(tickers=s_and_p,col='Avg_%_Betti',inputpath='Betti_experiments/betti_dim_3_points_7')
    unweighted_betti_df.to_csv('Persim_vs_Betti/unweighted_betti_values.csv')

    
if not os.path.exists('Persim_vs_Betti/relative_unweighted_betti_values.csv'):
    rel_unweighted_betti_df=factor_collecting(tickers=s_and_p,col='Avg_Rel_%_Betti',inputpath='Betti_experiments/betti_dim_3_points_7')
    rel_unweighted_betti_df.to_csv('Persim_vs_Betti/relative_unweighted_betti_values.csv')

In [8]:
strat_broad=['pers_land','unweighted_betti','relative_unweighted_betti']

for strat in strat_broad:
    vals=pd.read_csv(f"Persim_vs_Betti/{strat}_values.csv",index_col='Date')
    
    if not os.path.exists(f"Persim_vs_Betti/{strat}_sort_factors.csv"):

        sort_factors_df=vals.rolling(window=21).apply(spearman_corr)
        sort_factors_df.to_csv(f"Persim_vs_Betti/{strat}_sort_factors.csv")

    if not os.path.exists(f"Persim_vs_Betti/{strat}_meandiff_factors.csv"):
        
        meandiff_factors_df=vals-vals.rolling(window=21).mean()
        meandiff_factors_df.to_csv(f"Persim_vs_Betti/{strat}_meandiff_factors.csv")



## Average sharpe values

Now apply for the various strategy choices

In [16]:
strat_names=[ "pers_land_sort",
                "pers_land_meandiff",
                "unweighted_betti_sort",
                "unweighted_betti_meandiff",
                "relative_unweighted_betti_sort",
                'relative_unweighted_betti_meandiff'
]


In [17]:

performance_stats_allstats_all=pd.DataFrame({
                    'Sharpe ratio':[empyrical.sharpe_ratio(bnh_returns_df)],
                    'Sortino ratio':[empyrical.sortino_ratio(bnh_returns_df)],
                    'alpha':['.'],
                    'beta':['.']
                },index=['bnh'])
for strat in strat_names:
    factor_df=pd.read_csv(f"Persim_vs_Betti/{strat}_factors.csv",index_col='Date')
    factor_df.fillna(axis=1,method='ffill',inplace=True)
    factor_df.index=pd.to_datetime(factor_df.index.values,format='%Y-%m-%d')
    pos=factors_to_pos(stock_factor_df=factor_df, collected_stocks_df=stock_perc_df, tickers=s_and_p, start='2018-01-01', end='2022-06-01')
    rets=pos_to_return(positions_df=pos, tickers=s_and_p, start='2018-01-01', end='2022-06-01',  hold_len=21)
    rets.to_csv(f"Persim_vs_Betti/avg_values_{strat}.csv")
    valdf=pd.DataFrame({
                    'Sharpe ratio':[empyrical.sharpe_ratio(rets)],
                    'Sortino ratio':[empyrical.sortino_ratio(rets)],
                    'alpha':[empyrical.alpha(rets,bnh_returns_df)],
                    'beta':[empyrical.beta(rets,bnh_returns_df)]
                },index=[strat])
    performance_stats_all=pd.concat([performance_stats_all,valdf])


In [18]:
performance_stats_all.to_csv("Persim_vs_Betti/all_strats_avg_risk.csv")

In [23]:
print(performance_stats_all.sort_values(['Sharpe ratio'],ascending=False).to_latex())

\begin{tabular}{lrrll}
\toprule
{} &  Sharpe ratio &  Sortino ratio &     alpha &      beta \\
\midrule
relative\_unweighted\_betti\_meandiff &      0.798065 &       1.115632 &  0.026468 &    0.9983 \\
weighted\_betti\_sort                &      0.728382 &       1.014169 &  0.010378 &  0.991546 \\
weighted\_betti\_meandiff            &      0.723990 &       1.013182 &  0.010315 &   1.06591 \\
unweighted\_betti\_meandiff          &      0.721991 &       1.001069 &  0.009113 &  1.011189 \\
bnh                                &      0.714781 &       0.989839 &         . &         . \\
relative\_unweighted\_betti\_sort     &      0.708915 &       0.991325 &  0.005968 &  0.986427 \\
pers\_land\_sort                     &      0.703792 &       0.971502 &  0.004635 &  0.994127 \\
unweighted\_betti\_sort              &      0.696378 &       0.970994 &  0.003247 &  1.013188 \\
pers\_land\_meandiff                 &      0.635825 &       0.891956 & -0.010024 &  1.142822 \\
relative\_weighted\_bet

In [20]:
performance_stats_all

Unnamed: 0,Sharpe ratio,Sortino ratio,alpha,beta
bnh,0.714781,0.989839,.,.
pers_land_sort,0.703792,0.971502,0.004635,0.994127
pers_land_meandiff,0.635825,0.891956,-0.010024,1.142822
weighted_betti_sort,0.728382,1.014169,0.010378,0.991546
weighted_betti_meandiff,0.72399,1.013182,0.010315,1.06591
unweighted_betti_sort,0.696378,0.970994,0.003247,1.013188
unweighted_betti_meandiff,0.721991,1.001069,0.009113,1.011189
relative_weighted_betti_sort,0.589074,0.815757,-0.020472,0.979634
relative_weighted_betti_meandiff,0.61011,0.844513,-0.016829,1.032124
relative_unweighted_betti_sort,0.708915,0.991325,0.005968,0.986427
