## Packages & Preamble

In [21]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from return_utils import find_bins, bin_position_calc, spearman_corr,factors_to_pos,collecting_stock_percentages,collecting_stock_data_avg,pos_to_return
import os
import empyrical
from Stock_tda_calc import s_and_p

In [2]:
%load_ext autoreload
%autoreload 2

Collect stock close and returns

In [27]:
stock_closes_df = collecting_stock_data_avg(tickers=s_and_p)
stock_closes_df.to_csv('csvs/collected_stock_closes.csv')
stock_closes_df.index=pd.to_datetime(stock_closes_df.index.values,format='%Y-%m-%d')
stock_closes_df.fillna(stock_closes_df.mean(axis=1),inplace=True)
stock_perc_df =collecting_stock_percentages(s_and_p)
stock_perc_df.index=pd.to_datetime(stock_perc_df.index.values,format='%Y-%m-%d')
stock_perc_df.fillna(stock_perc_df.mean(axis=1),inplace=True)
stock_closes_rets_df=pd.concat([stock_closes_df,stock_perc_df],axis=1)
stock_closes_rets_df.to_csv('csvs/collected_stock closes_and_percentage_returns.csv')


Collect bnh returns only

In [28]:
start = pd.to_datetime("2018-01-01")
end = pd.to_datetime("2022-06-01")
bnh_returns_df=stock_perc_df.mean(axis=1).loc[start:end]
bnh_returns_df.to_csv('csvs//bnh_returns.csv')

# Log Returns TDA average values for strategies

In [7]:
def factor_collecting(tickers, col='Avg_%_Betti',inputpath='Betti_experiments/betti_dim_3_points_7', index_col = 0):
    fact_df = pd.DataFrame(dtype=np.float64)
    for t in tqdm(tickers):
        df = pd.read_csv(f'{inputpath}/{t}.csv', index_col = index_col)
        if not col in df.columns:
            print(t)
            continue
        else: 
            df2 = pd.DataFrame({t: df[col]}, index=df.index)
            fact_df = pd.concat([fact_df, df2], axis=1)
            continue
    fact_df.index.name = 'Date'
    return(fact_df)

In [8]:
if not os.path.exists('Persim_vs_Betti/pers_land_values.csv'):
    collected_tda_df=factor_collecting(tickers=s_and_p,col='Persistence landscape norm', inputpath='Persim_vs_Betti/landscape', index_col = 0).loc['2017-07-19':'2022-06-01']
    collected_tda_df.to_csv('Persim_vs_Betti/pers_land_values.csv')


if not os.path.exists('Persim_vs_Betti/unweighted_betti_values.csv'):
    unweighted_betti_df=factor_collecting(tickers=s_and_p,col='Avg_%_Betti',inputpath='Betti_experiments/betti_dim_3_points_7')
    unweighted_betti_df.to_csv('Persim_vs_Betti/unweighted_betti_values.csv')

    
if not os.path.exists('Persim_vs_Betti/relative_unweighted_betti_values.csv'):
    rel_unweighted_betti_df=factor_collecting(tickers=s_and_p,col='Avg_rel_%_Betti',inputpath='Betti_experiments/betti_dim_3_points_7')
    rel_unweighted_betti_df.to_csv('Persim_vs_Betti/relative_unweighted_betti_values.csv')

In [29]:
strat_broad=['pers_land','unweighted_betti','relative_unweighted_betti']

for strat in strat_broad:
    vals=pd.read_csv(f"Persim_vs_Betti/{strat}_values.csv",index_col='Date')
    
    if not os.path.exists(f"Persim_vs_Betti/{strat}_sort_factors.csv"):

        sort_factors_df=vals.rolling(window=21).apply(spearman_corr)
        sort_factors_df.to_csv(f"Persim_vs_Betti/{strat}_sort_factors.csv")

    if not os.path.exists(f"Persim_vs_Betti/{strat}_meandiff_factors.csv"):
        
        meandiff_factors_df=vals-vals.rolling(window=21).mean()
        meandiff_factors_df.to_csv(f"Persim_vs_Betti/{strat}_meandiff_factors.csv")



## Average sharpe values

Now apply for the various strategy choices

In [30]:
strat_names=[ "pers_land_sort",
                "pers_land_meandiff",
                "unweighted_betti_sort",
                "unweighted_betti_meandiff",
                "relative_unweighted_betti_sort",
                'relative_unweighted_betti_meandiff'
]


In [31]:

performance_stats_all=pd.DataFrame({
                    'Sharpe ratio':[empyrical.sharpe_ratio(bnh_returns_df)],
                    'Sortino ratio':[empyrical.sortino_ratio(bnh_returns_df)],
                    'alpha':['.'],
                    'beta':['.']
                },index=['bnh'])
for strat in strat_names:
    factor_df=pd.read_csv(f"Persim_vs_Betti/{strat}_factors.csv",index_col='Date')
    factor_df.fillna(axis=1,method='ffill',inplace=True)
    factor_df = factor_df.fillna(factor_df.mean(axis = 0), axis = 0)
    factor_df.index=pd.to_datetime(factor_df.index.values,format='%Y-%m-%d')
    factor_df.dropna(inplace = True)  
    pos=factors_to_pos(stock_factor_df=factor_df, collected_stocks_df=stock_perc_df, tickers=s_and_p, start=start, end=end)
    rets=pos_to_return(positions_df=pos, tickers=s_and_p, start=start, end=end,  hold_len=21).loc[start:end].dropna()
    rets.to_csv(f"Persim_vs_Betti/avg_values_{strat}.csv")
    print(f'Strat {strat}: start date {rets.index.values[0]}')
    print(f'Strat {strat}: end date {max(rets.index)}')
    valdf=pd.DataFrame({
                    'Sharpe ratio':[empyrical.sharpe_ratio(rets)],
                    'Sortino ratio':[empyrical.sortino_ratio(rets)],
                    'alpha':[empyrical.alpha(rets,bnh_returns_df)],
                    'beta':[empyrical.beta(rets,bnh_returns_df)]
                },index=[strat])
    performance_stats_all=pd.concat([performance_stats_all,valdf])
performance_stats_all.to_csv("Persim_vs_Betti/all_strats_avg_risk.csv")

Strat pers_land_sort: start date 2018-01-02T00:00:00.000000000
Strat pers_land_sort: end date 2022-06-01 00:00:00
Strat pers_land_meandiff: start date 2018-01-02T00:00:00.000000000
Strat pers_land_meandiff: end date 2022-06-01 00:00:00
Strat unweighted_betti_sort: start date 2018-01-02T00:00:00.000000000
Strat unweighted_betti_sort: end date 2022-06-01 00:00:00
Strat unweighted_betti_meandiff: start date 2018-01-02T00:00:00.000000000
Strat unweighted_betti_meandiff: end date 2022-06-01 00:00:00
Strat relative_unweighted_betti_sort: start date 2018-01-02T00:00:00.000000000
Strat relative_unweighted_betti_sort: end date 2022-06-01 00:00:00
Strat relative_unweighted_betti_meandiff: start date 2018-01-02T00:00:00.000000000
Strat relative_unweighted_betti_meandiff: end date 2022-06-01 00:00:00


In [32]:
print(performance_stats_all.to_latex())

\begin{tabular}{lrrll}
\toprule
{} &  Sharpe ratio &  Sortino ratio &     alpha &      beta \\
\midrule
bnh                                &      0.666774 &       0.922815 &         . &         . \\
pers\_land\_sort                     &      0.640133 &       0.877366 & -0.005403 &   0.99818 \\
pers\_land\_meandiff                 &      0.631321 &       0.884062 & -0.006652 &  1.107075 \\
unweighted\_betti\_sort              &      0.655870 &       0.912094 & -0.001618 &  1.005661 \\
unweighted\_betti\_meandiff          &      0.696798 &       0.958546 &  0.007883 &  1.008329 \\
relative\_unweighted\_betti\_sort     &      0.700518 &       0.976978 &  0.008508 &  0.998609 \\
relative\_unweighted\_betti\_meandiff &      0.794776 &       1.108833 &  0.030035 &  0.988209 \\
\bottomrule
\end{tabular}



  print(performance_stats_all.to_latex())


In [33]:
performance_stats_all

Unnamed: 0,Sharpe ratio,Sortino ratio,alpha,beta
bnh,0.666774,0.922815,.,.
pers_land_sort,0.640133,0.877366,-0.005403,0.99818
pers_land_meandiff,0.631321,0.884062,-0.006652,1.107075
unweighted_betti_sort,0.65587,0.912094,-0.001618,1.005661
unweighted_betti_meandiff,0.696798,0.958546,0.007883,1.008329
relative_unweighted_betti_sort,0.700518,0.976978,0.008508,0.998609
relative_unweighted_betti_meandiff,0.794776,1.108833,0.030035,0.988209


In [34]:
strat = "relative_unweighted_betti_meandiff top third"
factor_df=pd.read_csv("Persim_vs_Betti/relative_unweighted_betti_meandiff_factors.csv",index_col='Date')
factor_df.fillna(axis=1,method='ffill',inplace=True)
factor_df = factor_df.fillna(factor_df.mean(axis = 0), axis = 0)
factor_df.index=pd.to_datetime(factor_df.index.values,format='%Y-%m-%d')
factor_df.dropna(inplace = True)  
pos=factors_to_pos(stock_factor_df=-1 * factor_df, collected_stocks_df=stock_perc_df, tickers=s_and_p, start=start, end=end)
rets=pos_to_return(positions_df=pos, tickers=s_and_p, start=start, end=end,  hold_len=21).loc[start:end].dropna()
rets.to_csv(f"Persim_vs_Betti/avg_values_{strat}.csv")
print(f'start date {rets.index.values[0]}')
print(f'end date {max(rets.index)}')
valdf=pd.DataFrame({
                'Sharpe ratio':[empyrical.sharpe_ratio(rets)],
                'Sortino ratio':[empyrical.sortino_ratio(rets)],
                'alpha':[empyrical.alpha(rets,bnh_returns_df)],
                'beta':[empyrical.beta(rets,bnh_returns_df)]
            },index=[strat])
valdf

start date 2018-01-02T00:00:00.000000000
end date 2022-06-01 00:00:00


Unnamed: 0,Sharpe ratio,Sortino ratio,alpha,beta
relative_unweighted_betti_meandiff top third,0.546626,0.752673,-0.026275,0.997479


In [35]:
print(valdf.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  Sharpe ratio &  Sortino ratio &     alpha &      beta \\
\midrule
relative\_unweighted\_betti\_meandiff top third &      0.546626 &       0.752673 & -0.026275 &  0.997479 \\
\bottomrule
\end{tabular}



  print(valdf.to_latex())


In [36]:
strat = "relative_unweighted_betti_meandiff with shorting"
factor_df=pd.read_csv("Persim_vs_Betti/relative_unweighted_betti_meandiff_factors.csv",index_col='Date')
factor_df.fillna(axis=1,method='ffill',inplace=True)
factor_df = factor_df.fillna(factor_df.mean(axis = 0), axis = 0)
factor_df.index=pd.to_datetime(factor_df.index.values,format='%Y-%m-%d')
factor_df.dropna(inplace = True)  
pos=factors_to_pos(stock_factor_df= factor_df, collected_stocks_df=stock_perc_df, tickers=s_and_p, start=start, end=end, short = True)
rets=pos_to_return(positions_df=pos, tickers=s_and_p, start=start, end=end,  hold_len=21).loc[start:end].dropna()
rets.to_csv(f"Persim_vs_Betti/avg_values_{strat}.csv")
print(f'start date {rets.index.values[0]}')
print(f'end date {max(rets.index)}')
valdf=pd.DataFrame({
                'Sharpe ratio':[empyrical.sharpe_ratio(rets)],
                'Sortino ratio':[empyrical.sortino_ratio(rets)],
                'alpha':[empyrical.alpha(rets,bnh_returns_df)],
                'beta':[empyrical.beta(rets,bnh_returns_df)]
            },index=[strat])
valdf

start date 2018-01-02T00:00:00.000000000
end date 2022-06-01 00:00:00


Unnamed: 0,Sharpe ratio,Sortino ratio,alpha,beta
relative_unweighted_betti_meandiff with shorting,1.417958,2.15454,0.028067,-0.005717


In [37]:
print(valdf.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  Sharpe ratio &  Sortino ratio &     alpha &      beta \\
\midrule
relative\_unweighted\_betti\_meandiff with shorting &      1.417958 &        2.15454 &  0.028067 & -0.005717 \\
\bottomrule
\end{tabular}



  print(valdf.to_latex())
