In [1]:
import pandas as pd
import numba
import numpy as np
from scipy.stats import zscore
import plotly as px
import pandas_datareader as web

In [2]:
df = pd.read_csv('data/data.csv', index_col=0).set_index(['equity','date'])

In [5]:
rfr = web.get_data_fred("IRLTLT01EZM156N", df.index.get_level_values('date').min(), df.index.get_level_values('date').max()).div(100)
rfr = pd.Series(index=df.index.get_level_values('date').unique()[1:], data=rfr.values[:, 0], name='risk_free_rate')

In [95]:
df_prices = df['PX_LAST'].unstack(level=0).replace(0, np.nan)
df_log_rtn = df_prices.apply(lambda x: np.log(x / x.shift(1))).dropna(how='all')
benchmark_rtn = df_log_rtn.mean(skipna=True, axis='columns')

In [7]:
def select_best_equities_from_factor(df_full, df_rnt, factor_selected, num_equities):
    factor_values = df_full[factor_selected].unstack(level=0).iloc[1:]
    equities =  factor_values.apply(
        lambda fact_val: fact_val.loc[~df_rnt.loc[fact_val.name, fact_val.index].isna()
        ].sort_values(ascending=False).head(num_equities).index.values,
        axis='columns', result_type='expand')
    equities.columns = [str(i) + "_name" for i in range(len(equities.columns))]
    return equities, factor_values

def compute_return_from_equities(df_rtn, equities):
    equities_rtn = equities.apply(lambda row: df_rtn.loc[row.name, row.values].values, axis='columns', result_type='expand')
    equities_rtn.columns = [str(i) + "_rtn" for i in range(len(equities_rtn.columns))]
    equities_rtn.columns.name = 'equity_rtn'
    return equities_rtn

def extract_portfolio_state(equities, factor_values):
    equities_sparse = pd.DataFrame(index=factor_values.index, columns=factor_values.columns, data=0)
    for date in equities_sparse.index:
        equities_sparse.loc[date, equities.loc[date].values] = 1
    equities_sparse = (2 * equities_sparse - equities_sparse.shift(1, fill_value=0)).dropna()
    return equities_sparse.applymap(lambda x: 'not in' if x == 0 else 'in' if x == 1 else 'added' if x == 2 else 'removed')

def apply_fees(equities_rtn, equities_names, port_state, fees):
    return equities_rtn.apply(
        lambda rtn_row: rtn_row -
                        (fees * (port_state.loc[rtn_row.name, equities_names.loc[rtn_row.name]] == 'added')
                         * (1 + rtn_row).values).values,
        axis='columns')

def compute_univariate_strategy(df_full, df_rtn, factor_selected, num_equities, market_rtn, fees=None):
    equities_names, factor_values = select_best_equities_from_factor(df_full, df_rtn, factor_selected, num_equities)
    equities_rtn = compute_return_from_equities(df_rtn, equities_names)
    port_state = extract_portfolio_state(equities_names, factor_values)

    if fees is not None:
        equities_rtn = apply_fees(equities_rtn, equities_names, port_state, fees)

    equities = equities_names.merge(equities_rtn, left_index=True, right_index=True)
    equities = equities.iloc[:, np.concatenate([[i, i + num_equities] for i in range(num_equities)])]
    equities.columns = pd.MultiIndex.from_product([range(num_equities), ['name', 'rtn']])
    equities.columns.names = ['equity_pos', 'info']

    ris = equities.apply(lambda row: np.average(row.loc[:, "rtn"].values), axis='columns').to_frame(name='strategy_rtn')
    ris['strategy_alpha'] = ris['strategy_rtn'] - market_rtn
    info_ratio = ris['strategy_alpha'].mean() / ris['strategy_alpha'].std()
    return equities, ris, info_ratio, port_state

In [8]:
window_date_size = 12

@numba.jit(nopython=True)
def compute_window_corr_avg(window):
    if len(window) == window_date_size:
        corr_matrix = np.corrcoef(window, rowvar=False)
        corr_matrix_nod_iag = np.extract(~np.eye(corr_matrix.shape[0], dtype=numba.boolean), corr_matrix).reshape(corr_matrix.shape[0], -1)
        avg_corr = np.sum(corr_matrix_nod_iag, axis=1) / corr_matrix_nod_iag.shape[1]

    else:
        avg_corr = np.ones(window.shape[1])
    return avg_corr

In [9]:
def compute_zscore_strategy_simple(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    df_full['zscore'] = df_full[factors_selected].groupby(level=0).apply(lambda x: zscore(x).mean(axis=1)).values
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

def compute_zscore_strategy_weighted(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    zs_factors = df[factors_selected].groupby(level=0, group_keys=False).apply(lambda x: zscore(x))

    zs_weight = zs_factors.groupby(level=0).rolling(window_date_size, min_periods=1, method="table"
                                                    ).apply(compute_window_corr_avg, raw=True, engine="numba").values

    zs_weight = 1 - abs(zs_weight) + 0.000001 # si somma un piccolo valore per le prime date in cui i pesi sono 0
    df_full['zscore'] = np.average(zs_factors.values, axis=1, weights=zs_weight)
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

In [10]:
def compute_sequential_screening(df_full, df_rtn, filter_factor, filter_n_equities, market_rtn, fees=None):
    df_filtered = df_full.reset_index()
    for n_equities, factor in zip(filter_n_equities[:-1], filter_factor[:-1]):
        eq_filter = select_best_equities_from_factor(df_filtered.set_index(['equity', 'date']), df_rtn, factor, n_equities)[0]
        df_filtered = pd.concat([df_filtered.loc[(df_filtered['date'] == date) & (df_filtered['equity'].isin(equities.values))
                                 ] for date, equities in eq_filter.iterrows()])
    return compute_univariate_strategy(df_filtered.set_index(['equity', 'date']), df_rtn, filter_factor[-1], filter_n_equities[-1], market_rtn, fees)

In [79]:
def evaluate_strategy(strategy_result, market_rtn, risk_free_rate):
    rtn, alpha, info_rate = strategy_result[1]['strategy_rtn'], strategy_result[1]['strategy_alpha'], strategy_result[2]

    rtn_mean = rtn.mean()
    std = rtn.std()
    downside_std = np.sqrt(np.sum((rtn[rtn < 0] - rtn.mean())**2) / (len(rtn) - 1))

    alpha = alpha.mean()
    risk_adjusted = rtn_mean / std
    sharpe_ratio = (rtn - risk_free_rate).mean() / std
    beta = market_rtn.cov(rtn) / market_rtn.var()
    treynor_ratio = (rtn - risk_free_rate).mean()  / beta
    sortino_ratio = (rtn - risk_free_rate).mean() / downside_std

    stats = pd.Series(index=['info_ratio', 'mean', 'std', 'downside_std', 'alpha', 'risk_adjusted', 'sharpe_ratio', 'beta', 'treynor_ratio', 'sortino_ratio'],
                     data=[info_rate, rtn_mean, std, downside_std, alpha, risk_adjusted, sharpe_ratio, beta, treynor_ratio, sortino_ratio], name='stats')

    rtn_com = rtn.cumsum()

    return stats, rtn_com


In [113]:
commission_fees = 0.002
num_equities = 30
testing_factors = ['PE_RATIO', 'EBITDA_MARGIN', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD', 'RSI_14D', 'VOLATILITY_30D', 'CUR_MKT_CAP', 'OPERATING_ROIC']

# Applicazione delle strategie univariate su più fattori

In [116]:
# Calcolo equities e information ratio per ogni factor scelto
strs_res_stats = pd.DataFrame(columns=['info_ratio', 'mean', 'std', 'downside_std', 'alpha', 'risk_adjusted', 'sharpe_ratio', 'beta', 'treynor_ratio', 'sortino_ratio', 'factors_used'])
strs_res_rtn_com = pd.DataFrame()
strs_res = {}

for factor in testing_factors:
    strs_res[factor] = compute_univariate_strategy(df, df_log_rtn, factor, num_equities, benchmark_rtn, fees=commission_fees)
    str_res = evaluate_strategy(strs_res[factor], benchmark_rtn, rfr)
    strs_res_stats.loc[factor] = str_res[0]
    strs_res_stats.at[factor, "factors_used"] = [factor]
    strs_res_rtn_com[factor] = str_res[1]

best_factors = strs_res_stats.sort_values(by='info_ratio', ascending=False).head(4).index.values

for factor in testing_factors:
    strs_res[factor + "_no-fees"] = compute_univariate_strategy(df, df_log_rtn, factor, num_equities, benchmark_rtn)
    str_res = evaluate_strategy(strs_res[factor + "_no-fees"], benchmark_rtn, rfr)
    strs_res_stats.loc[factor + "_no-fees"] = str_res[0]
    strs_res_stats.at[factor + "_no-fees", "factors_used"] = [factor]
    strs_res_rtn_com[factor + "_no-fees"] = str_res[1]


In [104]:
seq_strategy = compute_sequential_screening(df, df_log_rtn, best_factors, [400, 200, 100, 30], benchmark_rtn, fees=commission_fees)
zscore_strategy_simple = compute_zscore_strategy_simple(df, df_log_rtn, best_factors, num_equities, benchmark_rtn, fees=commission_fees)
zscore_strategy_weighted = compute_zscore_strategy_weighted(df, df_log_rtn, best_factors, num_equities, benchmark_rtn, fees=commission_fees)

# Valutazione di tutte le strategie non univariate

In [117]:
for strategy, strategy_name, factor_used in zip([seq_strategy, zscore_strategy_simple, zscore_strategy_weighted],
                                                ["sequential", "zscore_simple", "zscore_weighted"],
                                                [best_factors]*3):
    strs_res[strategy_name] = strategy
    str_res = evaluate_strategy(strategy, benchmark_rtn, rfr)
    strs_res_stats.loc[strategy_name] = str_res[0]
    strs_res_stats.at[strategy_name, "factors_used"] = factor_used
    strs_res_rtn_com[strategy_name] = str_res[1]

In [118]:
strs_res_stats

Unnamed: 0,info_ratio,mean,std,downside_std,alpha,risk_adjusted,sharpe_ratio,beta,treynor_ratio,sortino_ratio,factors_used
PE_RATIO,0.281108,0.012248,0.057249,0.041647,0.005642,0.21394,-0.490486,1.023499,-0.027435,-0.674233,PE_RATIO
EBITDA_MARGIN,-0.139086,0.003648,0.044339,0.035267,-0.002958,0.082268,-0.827263,0.775638,-0.04729,-1.040068,[EBITDA_MARGIN]
PX_TO_BOOK_RATIO,0.619483,0.022222,0.045974,0.033435,0.015616,0.483356,-0.393815,0.769196,-0.023538,-0.541506,[PX_TO_BOOK_RATIO]
NORMALIZED_ACCRUALS_CF_METHOD,0.083081,0.008053,0.054065,0.043833,0.001447,0.148957,-0.596951,0.977036,-0.033033,-0.736295,[NORMALIZED_ACCRUALS_CF_METHOD]
RSI_14D,1.755465,0.065792,0.036437,0.014521,0.059186,1.80562,0.698861,0.534775,0.047617,1.753701,[RSI_14D]
VOLATILITY_30D,0.222363,0.021563,0.112118,0.084401,0.014957,0.192327,-0.16736,1.965232,-0.009548,-0.222321,[VOLATILITY_30D]
CUR_MKT_CAP,-0.013048,0.006268,0.043303,0.032895,-0.000338,0.14474,-0.786537,0.719197,-0.047358,-1.035396,[CUR_MKT_CAP]
OPERATING_ROIC,0.304444,0.011735,0.049362,0.038994,0.005129,0.237738,-0.579235,0.89204,-0.032053,-0.733248,[OPERATING_ROIC]
PE_RATIO_no-fees,0.302405,0.012679,0.057263,0.041635,0.006073,0.221408,-0.482838,1.023722,-0.027008,-0.664071,[PE_RATIO]
EBITDA_MARGIN_no-fees,-0.135435,0.003721,0.044335,0.035268,-0.002885,0.083931,-0.82567,0.775341,-0.047213,-1.037948,[EBITDA_MARGIN]


In [119]:
best_factors

array(['RSI_14D', 'PX_TO_BOOK_RATIO', 'OPERATING_ROIC', 'PE_RATIO'],
      dtype=object)