In [19]:
import pandas as pd
import numba
import numpy as np
from scipy.stats import zscore
import pandas_datareader as web
import os
import pickle
from constants import DATA_PATH, OUTPUT_PATH

In [20]:
df = pd.read_csv(os.path.join(DATA_PATH, 'data.csv')).set_index(['equity', 'date'])

In [21]:
rfr = web.get_data_fred("IRLTLT01EZM156N", df.index.get_level_values('date').min(), df.index.get_level_values('date').max()).div(100)
rfr = pd.Series(index=df.index.get_level_values('date').unique()[1:], data=rfr.values[:, 0], name='risk_free_rate')

In [22]:
df_prices = df['PX_LAST'].unstack(level=0).replace(0, np.nan)
df_log_rtn = df_prices.apply(lambda x: np.log(x / x.shift(1))).dropna(how='all')
benchmark_rtn = df_log_rtn.mean(skipna=True, axis='columns')

In [23]:
df_prices

equity,02P GR,1BM GR,1PL GR,1VS GR,2HR GR,3W9 GR,5AB GR,9AR GR,A3TV SM,AAD GR,...,YRM IM,YTY1V FH,ZAR GR,ZC FP,ZEL SM,ZIGGO NA,ZIL2 GR,ZO1 GR,ZOT SM,ZV IM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003-01-31,,,,,4.121,,,,,5.500,...,,4.275,19.00,19.40,5.4020,,2.417,,6.1829,
2003-02-28,,,,,4.164,,,,,4.500,...,,4.175,18.25,17.40,5.5392,,2.358,,6.0952,
2003-03-31,,,,,4.021,,,,,3.980,...,,3.673,20.70,15.54,5.9902,,2.425,,6.1683,
2003-04-30,,,,,4.128,,,,,4.300,...,,4.000,19.00,20.22,6.8627,,3.250,,6.2171,
2003-05-30,,,,,4.156,,,,,4.100,...,,4.135,21.70,19.52,7.5980,,3.500,,6.8217,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-11-30,12.10,0.539,8.422,1.299,16.360,3.490,14.895,2.176,4.45,27.215,...,28.25,11.910,61.50,60.72,1.5450,,17.710,55.55,10.4600,4.382
2011-12-30,11.20,0.529,8.000,1.400,16.550,3.100,14.885,2.440,4.65,26.650,...,28.25,12.380,61.10,65.48,1.7200,,19.180,43.10,10.6000,4.508
2012-01-31,13.29,0.529,10.525,1.295,16.900,3.025,14.911,2.580,4.82,29.870,...,28.25,14.100,60.99,68.25,1.7500,,22.940,37.10,10.9000,4.680
2012-02-29,12.44,0.529,11.805,1.122,16.010,4.270,13.500,2.720,4.50,33.095,...,28.25,16.550,63.27,72.33,1.8400,,24.630,40.00,10.1200,4.982


In [0]:
def shift_factors(dataframe):
    """
    This function shifts the factors in the dataframe to avoid look-ahead bias.

    The function first shifts all the columns by 1 month. Then, it takes price related factors to avoid shifting them.
    After that, it shifts all valuation related columns by other 2 months to avoid look-ahead bias.
    Finally, it puts back the price related factors and the price column.

    :param dataframe: The dataframe containing the factors.
    :type dataframe: pandas.DataFrame
    :return: The dataframe with shifted factors.
    :rtype: pandas.DataFrame
    """
    factors_price_related = ['PX_LAST', 'MOV_AVG_10D', 'MOV_AVG_20D', 'MOV_AVG_30D', 'MOV_AVG_40D', 'MOV_AVG_50D', 'MOV_AVG_5D', 'RSI_14D', 'RSI_30D', 'RSI_9D', 'VOLATILITY_180D',
                             'VOLATILITY_30D','VOLATILITY_90D', 'CUR_MKT_CAP']

    dataframe = dataframe.groupby(level=0).shift(1) # shift all the columns by 1 month to avoid look-ahead bias (for both price_related and valuation_related factors)
    dataframe_price_rl_tmp = dataframe[factors_price_related].values # take price related factors to avoid shifting them
    dataframe = dataframe.groupby(level=0).shift(2) # shift all valuation related columns by other 2 months to avoid look-ahead bias

    # now we can put back the price related factors and the price column
    dataframe[factors_price_related] = dataframe_price_rl_tmp

    return dataframe


def transform_factor(series, factor_type, price_series):
    """
    This function transforms the factor series based on the factor type.

    The function checks the factor type and applies the appropriate transformation.
    For example, for RSI factors, it subtracts the series from 30 to find undervalued stocks.
    For Moving Averages, it subtracts the series from the price series to use Moving Averages as support for the price.

    :param series: The series to be transformed.
    :type series: pandas.Series
    :param factor_type: The type of the factor.
    :type factor_type: str
    :param price_series: The price series.
    :type price_series: pandas.Series
    :return: The transformed series.
    :rtype: pandas.Series
    """
    highest_values_factors = ['5YR_AVG_RETURN_ON_EQUITY', 'BEST_EPS', 'CUR_MKT_CAP', 'EBITDA_MARGIN', 'EBITDA_MARGIN_3YR_AVG', 'EQY_DPS_NET_5YR_GROWTH', 'NORMALIZED_ROE', 'OPERATING_ROIC', 'T12M_DIL_EPS_CONT_OPS', 'TRAIL_12M_EBITDA_PER_SHARE', 'TRAIL_12M_SALES_PER_SH', "EQY_REC_CONS"]

    lowest_values_factors = ['10_YEAR_MOVING_AVERAGE_PE', 'CURRENT_EV_TO_12M_SALES', 'CURRENT_EV_TO_T12M_EBITDA', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'FIVE_YR_AVG_PRICE_EARNINGS', 'NET_DEBT_PER_SHARE', 'NORMALIZED_ACCRUALS_BS_METHOD', 'NORMALIZED_ACCRUALS_CF_METHOD', 'PE_RATIO', 'PX_TO_BOOK_RATIO', 'PX_TO_TANG_BV_PER_SH', 'T12M_DIL_PE_CONT_OPS', 'T12M_DVD_PAYOUT_RATIO', 'TANG_BOOK_VAL_PER_SH', 'VOLATILITY_180D', 'VOLATILITY_30D', 'VOLATILITY_90D', "WACC_COST_EQUITY"]

    if factor_type in highest_values_factors: # default is highest values are better
        return series
    elif factor_type in lowest_values_factors: # lowest values are better
        return -series
    elif factor_type in ['RSI_14D', 'RSI_30D', 'RSI_9D']:  # We use RSI to find undervalued stocks (close or less than 30)
        return 30 - series
    elif factor_type in ['MOV_AVG_10D', 'MOV_AVG_20D', 'MOV_AVG_30D', 'MOV_AVG_40D', 'MOV_AVG_50D', 'MOV_AVG_5D']:
        return price_series - series # We use Moving Averages as support for the price, so we buy when we are close or under them
    raise Exception("Factor type " + factor_type + " not recognized")

In [24]:
def select_best_equities_from_factor(df_full, df_rnt, factor_selected, num_equities):
    """
    This function selects the best equities based on a selected factor.

    The function unstacks the selected factor from the full dataframe and selects the top equities based on the factor values.

    :param df_full: The full dataframe containing all the factors.
    :type df_full: pandas.DataFrame
    :param df_rnt: The dataframe containing the returns.
    :type df_rnt: pandas.DataFrame
    :param factor_selected: The selected factor.
    :type factor_selected: str
    :param num_equities: The number of equities to select.
    :type num_equities: int
    :return: The selected equities and the factor values.
    :rtype: tuple
    """
    factor_values = df_full[factor_selected].unstack(level=0).iloc[1:]
    equities =  factor_values.apply(
        lambda fact_val: fact_val.loc[~df_rnt.loc[fact_val.name, fact_val.index].isna()
        ].sort_values(ascending=False).head(num_equities).index.values,
        axis='columns', result_type='expand')
    equities.columns = [str(i) + "_name" for i in range(len(equities.columns))]
    return equities, factor_values


def compute_return_from_equities(df_rtn, equities):
    """
    This function computes the return from the selected equities.

    The function applies the return calculation to each row of the equities dataframe.

    :param df_rtn: The dataframe containing the returns.
    :type df_rtn: pandas.DataFrame
    :param equities: The dataframe containing the selected equities.
    :type equities: pandas.DataFrame
    :return: The returns from the selected equities.
    :rtype: pandas.DataFrame
    """
    equities_rtn = equities.apply(lambda row: df_rtn.loc[row.name, row.values].values, axis='columns', result_type='expand')
    equities_rtn.columns = [str(i) + "_rtn" for i in range(len(equities_rtn.columns))]
    equities_rtn.columns.name = 'equity_rtn'
    return equities_rtn


def extract_portfolio_state(equities, factor_values):
    """
    This function extracts the state of the portfolio.

    The function creates a sparse dataframe with the same index and columns as the factor values dataframe.
    It then fills the dataframe with 1s for the selected equities and calculates the state of the portfolio.

    :param equities: The dataframe containing the selected equities.
    :type equities: pandas.DataFrame
    :param factor_values: The dataframe containing the factor values.
    :type factor_values: pandas.DataFrame
    :return: The state of the portfolio.
    :rtype: pandas.DataFrame
    """
    equities_sparse = pd.DataFrame(index=factor_values.index, columns=factor_values.columns, data=0)
    for date in equities_sparse.index:
        equities_sparse.loc[date, equities.loc[date].values] = 1
    equities_sparse = (2 * equities_sparse - equities_sparse.shift(1, fill_value=0)).dropna()
    return equities_sparse.applymap(lambda x: 0.33 if x == 0 else 0.66 if x == 1 else 1 if x == 2 else 0)


def apply_fees(equities_rtn, equities_names, port_state, fees):
    """
    This function applies the fees to the returns of the selected equities.

    :param equities_rtn: The dataframe containing the returns of the selected equities.
    :type equities_rtn: pandas.DataFrame
    :param equities_names: The dataframe containing the names of the selected equities.
    :type equities_names: pandas.DataFrame
    :param port_state: The state of the portfolio.
    :type port_state: pandas.DataFrame
    :param fees: The fees to be applied.
    :type fees: float
    :return: The returns after applying the fees.
    :rtype: pandas.DataFrame
    """
    return equities_rtn.apply(
        lambda rtn_row: rtn_row -
                        (fees * (port_state.loc[rtn_row.name, equities_names.loc[rtn_row.name]] == 1)
                         * (1 + rtn_row).values).values,
        axis='columns')


def compute_univariate_strategy(df_full, df_rtn, factor_selected, num_equities, market_rtn, fees=None):
    """
    This function computes a univariate strategy based on a selected factor.

    The function selects the best equities based on the selected factor, computes the returns from the selected equities,
    extracts the state of the portfolio, applies the fees to the returns, and calculates the information ratio.

    :param df_full: The full dataframe containing all the factors.
    :type df_full: pandas.DataFrame
    :param df_rtn: The dataframe containing the returns.
    :type df_rtn: pandas.DataFrame
    :param factor_selected: The selected factor.
    :type factor_selected: str
    :param num_equities: The number of equities to select.
    :type num_equities: int
    :param market_rtn: The market return.
    :type market_rtn: pandas.Series
    :param fees: The fees to be applied. Default is None.
    :type fees: float, optional
    :return: The equities, returns, information ratio, and state of the portfolio.
    :rtype: tuple
    """
    equities_names, factor_values = select_best_equities_from_factor(df_full, df_rtn, factor_selected, num_equities)
    equities_rtn = compute_return_from_equities(df_rtn, equities_names)
    port_state = extract_portfolio_state(equities_names, factor_values)

    if fees is not None:
        equities_rtn = apply_fees(equities_rtn, equities_names, port_state, fees)

    equities = equities_names.merge(equities_rtn, left_index=True, right_index=True)
    equities = equities.iloc[:, np.concatenate([[i, i + num_equities] for i in range(num_equities)])]
    equities.columns = pd.MultiIndex.from_product([range(num_equities), ['name', 'rtn']])
    equities.columns.names = ['equity_pos', 'info']

    ris = equities.apply(lambda row: np.average(row.loc[:, "rtn"].values), axis='columns').to_frame(name='strategy_rtn')
    ris['strategy_alpha'] = ris['strategy_rtn'] - market_rtn
    info_ratio = ris['strategy_alpha'].mean() / ris['strategy_alpha'].std()
    return equities, ris, info_ratio, port_state

In [26]:
window_date_size = 12

@numba.jit(nopython=True)
def compute_window_corr_avg(window):
    """
    This function computes the average correlation of a window of data.

    The function checks if the length of the window is equal to the predefined window size.
    If it is, it calculates the correlation matrix of the window, removes the diagonal elements,
    and calculates the average correlation.
    If the length of the window is not equal to the window size, it returns a vector of ones.

    :param window: The window of data.
    :type window: numpy.ndarray
    :return: The average correlation of the window of data.
    :rtype: numpy.ndarray
    """
    if len(window) == window_date_size:
        corr_matrix = np.corrcoef(window, rowvar=False)
        corr_matrix_nod_iag = np.extract(~np.eye(corr_matrix.shape[0], dtype=numba.boolean), corr_matrix).reshape(corr_matrix.shape[0], -1)
        avg_corr = np.sum(corr_matrix_nod_iag, axis=1) / corr_matrix_nod_iag.shape[1]

    else:
        avg_corr = np.ones(window.shape[1])
    return avg_corr

In [27]:
def compute_zscore_strategy_simple(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    """
    This function computes a simple z-score strategy.

    The function calculates the z-score for the selected factors and then applies a univariate strategy.

    :param df_full: The full dataframe containing all the factors.
    :type df_full: pandas.DataFrame
    :param df_rtn: The dataframe containing the returns.
    :type df_rtn: pandas.DataFrame
    :param factors_selected: The selected factors.
    :type factors_selected: list
    :param num_equities: The number of equities to select.
    :type num_equities: int
    :param market_rtn: The market return.
    :type market_rtn: pandas.Series
    :param fees: The fees to be applied. Default is None.
    :type fees: float, optional
    :return: The result of the univariate strategy.
    :rtype: tuple
    """
    df_full['zscore'] = df_full[factors_selected].groupby(level=0).apply(lambda x: zscore(x, nan_policy='omit').mean(axis=1)).values
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

def compute_zscore_strategy_weighted(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    """
    This function computes a weighted z-score strategy.

    The function calculates the z-score for the selected factors, weights them, and then applies a univariate strategy.

    :param df_full: The full dataframe containing all the factors.
    :type df_full: pandas.DataFrame
    :param df_rtn: The dataframe containing the returns.
    :type df_rtn: pandas.DataFrame
    :param factors_selected: The selected factors.
    :type factors_selected: list
    :param num_equities: The number of equities to select.
    :type num_equities: int
    :param market_rtn: The market return.
    :type market_rtn: pandas.Series
    :param fees: The fees to be applied. Default is None.
    :type fees: float, optional
    :return: The result of the univariate strategy.
    :rtype: tuple
    """
    zs_factors = df[factors_selected].groupby(level=0, group_keys=False).apply(lambda x: zscore(x, nan_policy='omit'))

    zs_weight = zs_factors.groupby(level=0).rolling(window_date_size, min_periods=1, method="table"
                                                    ).apply(compute_window_corr_avg, raw=True, engine="numba").values

    zs_weight = 1 - abs(zs_weight) + 0.000001 # sum a small number to avoid division by zero
    df_full['zscore'] = np.average(zs_factors.values, axis=1, weights=zs_weight)
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

def compute_sequential_screening(df_full, df_rtn, filter_factor, filter_n_equities, market_rtn, fees=None):
    """
    This function computes a sequential screening strategy.

    The function filters the equities based on the selected factors and then applies a univariate strategy.

    :param df_full: The full dataframe containing all the factors.
    :type df_full: pandas.DataFrame
    :param df_rtn: The dataframe containing the returns.
    :type df_rtn: pandas.DataFrame
    :param filter_factor: The factors to use for filtering.
    :type filter_factor: list
    :param filter_n_equities: The number of equities to select at each filtering step.
    :type filter_n_equities: list
    :param market_rtn: The market return.
    :type market_rtn: pandas.Series
    :param fees: The fees to be applied. Default is None.
    :type fees: float, optional
    :return: The result of the univariate strategy.
    :rtype: tuple
    """
    df_filtered = df_full.reset_index()
    for n_equities, factor_to_use in zip(filter_n_equities[:-1], filter_factor[:-1]):
        eq_filter = select_best_equities_from_factor(df_filtered.set_index(['equity', 'date']), df_rtn, factor_to_use, n_equities)[0]
        df_filtered = pd.concat([df_filtered.loc[(df_filtered['date'] == date) & (df_filtered['equity'].isin(equities.values))
                                 ] for date, equities in eq_filter.iterrows()])
    return compute_univariate_strategy(df_filtered.set_index(['equity', 'date']), df_rtn, filter_factor[-1], filter_n_equities[-1], market_rtn, fees)


In [29]:
def evaluate_strategy(strategy_result, market_rtn, risk_free_rate):
    """
    This function evaluates the performance of a given strategy.

    The function calculates various performance metrics such as return mean, standard deviation, downside standard deviation,
    cumulative return, alpha mean, risk-adjusted return, Sharpe ratio, beta, Treynor ratio, and Sortino ratio.

    :param strategy_result: The result of the strategy to be evaluated.
    :type strategy_result: tuple
    :param market_rtn: The market return.
    :type market_rtn: pandas.Series
    :param risk_free_rate: The risk-free rate.
    :type risk_free_rate: pandas.Series
    :return: The performance metrics, return, cumulative return, alpha, and cumulative alpha.
    :rtype: tuple
    """
    rtn, alpha, info_rate = strategy_result[1]['strategy_rtn'], strategy_result[1]['strategy_alpha'], strategy_result[2]

    rtn_mean = rtn.mean()
    std = rtn.std()
    downside_std = np.sqrt(np.sum((rtn[rtn < 0] - rtn.mean())**2) / (len(rtn) - 1))
    rtn_com = rtn.cumsum()
    alpha_com = alpha.cumsum()

    alpha_mean = alpha.mean()
    risk_adjusted = rtn_mean / std
    sharpe_ratio = (rtn - risk_free_rate).mean() / std
    beta = market_rtn.cov(rtn) / market_rtn.var()
    treynor_ratio = (rtn - risk_free_rate).mean()  / beta
    sortino_ratio = (rtn - risk_free_rate).mean() / downside_std

    stats = pd.Series(index=['info_ratio', 'rtn_avg', 'rtn_std', 'rtn_downside_std', 'rtn_tot', 'alpha_avg', 'alpha_tot', 'rtn_risk_adj', 'sharpe_ratio', 'beta', 'treynor_ratio', 'sortino_ratio'],
                     data=[info_rate, rtn_mean, std, downside_std, rtn_com.values[-1], alpha_mean, alpha_com.values[-1], risk_adjusted, sharpe_ratio, beta, treynor_ratio, sortino_ratio], name='stats')

    return stats, rtn, rtn_com, alpha, alpha_com


# Factor transformation

In [30]:
df = shift_factors(df)
df = df.drop(columns=['PX_LAST']).apply(lambda x: transform_factor(x, x.name, df['PX_LAST']))


# Application of univariate strategies on multiple factors

In [31]:
commission_fees = 0.002
testing_factors = ['RSI_14D', 'PE_RATIO', 'PX_TO_BOOK_RATIO', 'BEST_EPS', 'WACC_COST_EQUITY', 'MOV_AVG_30D', 'EBITDA_MARGIN', 'NET_DEBT_PER_SHARE', 'NORMALIZED_ACCRUALS_CF_METHOD', 'VOLATILITY_90D']


n_factor_to_use = 4
seq_window = [400, 200, 100, 30]
num_equities_s = seq_window[-1]


In [32]:
# Compute equities and information ratio for each selected factor
strs_res_stats = pd.DataFrame(columns=['info_ratio', 'rtn_avg', 'rtn_std', 'rtn_downside_std', 'rtn_tot', 'alpha_avg', 'alpha_tot', 'rtn_risk_adj', 'sharpe_ratio', 'beta', 'treynor_ratio', 'sortino_ratio'])
strs_res_rtn, strs_res_rtn_com, strs_res_rtn_alpha, strs_res_rtn_alpha_com = pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
strs_res_port = {}

for factor in testing_factors: # with fees
    str_res = compute_univariate_strategy(df, df_log_rtn, factor, num_equities_s, benchmark_rtn, fees=commission_fees)
    strs_res_port[factor] = str_res[3]
    str_res = evaluate_strategy(str_res, benchmark_rtn, rfr)
    strs_res_stats.loc[factor] = str_res[0]
    strs_res_stats.at[factor, "factors_used"] = [factor]

    strs_res_rtn[factor] = str_res[1]
    strs_res_rtn_com[factor] = str_res[2]
    strs_res_rtn_alpha[factor] = str_res[3]
    strs_res_rtn_alpha_com[factor] = str_res[4]

strs_res_stats.iat[0, -1] = [strs_res_stats.iat[0, -1]]
best_factors = strs_res_stats.sort_values(by='info_ratio', ascending=False).head(n_factor_to_use).index.values

for factor in testing_factors: # without fees
    str_res = compute_univariate_strategy(df, df_log_rtn, factor, num_equities_s, benchmark_rtn)
    strs_res_port[factor + "_no-fees"] = str_res[3]
    str_res = evaluate_strategy(str_res, benchmark_rtn, rfr)
    strs_res_stats.loc[factor + "_no-fees"] = str_res[0]
    strs_res_stats.at[factor + "_no-fees", "factors_used"] = [factor]

    strs_res_rtn[factor + "_no-fees"] = str_res[1]
    strs_res_rtn_com[factor + "_no-fees"] = str_res[2]
    strs_res_rtn_alpha[factor + "_no-fees"] = str_res[3]
    strs_res_rtn_alpha_com[factor + "_no-fees"] = str_res[4]


In [33]:
best_factors

Unnamed: 0,info_ratio,factors_used
RSI_14D,0.174432,[RSI_14D]
NORMALIZED_ACCRUALS_CF_METHOD,0.158144,[NORMALIZED_ACCRUALS_CF_METHOD]
PX_TO_BOOK_RATIO,0.097661,[PX_TO_BOOK_RATIO]
NET_DEBT_PER_SHARE,0.070747,[NET_DEBT_PER_SHARE]
WACC_COST_EQUITY,0.057032,[WACC_COST_EQUITY]
VOLATILITY_90D,-0.03496,[VOLATILITY_90D]
BEST_EPS,-0.116543,[BEST_EPS]
EBITDA_MARGIN,-0.151997,[EBITDA_MARGIN]
PE_RATIO,-0.1533,[PE_RATIO]
MOV_AVG_30D,-0.191015,[MOV_AVG_30D]


# Application of multivariate strategies

In [34]:
seq_strategy = compute_sequential_screening(df, df_log_rtn, best_factors[::-1], seq_window, benchmark_rtn, fees=commission_fees)
zscore_strategy_simple = compute_zscore_strategy_simple(df, df_log_rtn, best_factors, num_equities_s, benchmark_rtn, fees=commission_fees)
zscore_strategy_weighted = compute_zscore_strategy_weighted(df, df_log_rtn, best_factors, num_equities_s, benchmark_rtn, fees=commission_fees)

  sub_result = numba_func(window, *args)
  result = np.where(min_periods_mask, result, np.nan)


# Elaboration of benchmark statistics

In [35]:
benchmark_rtn_com = benchmark_rtn.cumsum()
benchmark_stats = pd.Series(dtype='float64', name='benchmark_stats')

benchmark_stats['rtn_avg'] = benchmark_rtn.mean()
benchmark_stats['rtn_std'] = benchmark_rtn.std()
benchmark_stats['rtn_downside_std'] = np.sqrt(np.sum((benchmark_rtn[benchmark_rtn < 0] - benchmark_stats['rtn_std'])**2) / (len(benchmark_rtn) - 1))

benchmark_stats['rtn_tot'] = benchmark_rtn_com.values[-1]
benchmark_stats['rtn_risk_adj'] = benchmark_stats['rtn_avg'] / benchmark_stats['rtn_std']
benchmark_stats['sharpe_ratio'] = (benchmark_stats['rtn_avg'] - rfr).mean() / benchmark_stats['rtn_std']
# sortino ratio
benchmark_stats['sortino_ratio'] = (benchmark_stats['rtn_avg'] - rfr).mean() / benchmark_stats['rtn_downside_std']

strs_res_rtn['benchmark'] = benchmark_rtn
strs_res_rtn_com['benchmark'] = benchmark_rtn_com

benchmark_stats

rtn_avg             0.006537
rtn_std             0.052483
rtn_downside_std    0.066168
rtn_tot             0.719063
rtn_risk_adj        0.124553
sharpe_ratio       -0.643837
sortino_ratio      -0.510675
Name: benchmark_stats, dtype: float64

# Performance estimation of multivariate strategies 

In [36]:
for strategy, strategy_name, factor_used in zip([seq_strategy, zscore_strategy_simple, zscore_strategy_weighted],
                                                ["sequential", "zscore_simple", "zscore_weighted"],
                                                [best_factors]*3):
    strs_res_port[strategy_name] = strategy[3]
    str_res = evaluate_strategy(strategy, benchmark_rtn, rfr)
    strs_res_stats.loc[strategy_name] = str_res[0]
    strs_res_stats.at[strategy_name, "factors_used"] = factor_used

    strs_res_rtn[strategy_name] = str_res[1]
    strs_res_rtn_com[strategy_name] = str_res[2]
    strs_res_rtn_alpha[strategy_name] = str_res[3]
    strs_res_rtn_alpha_com[strategy_name] = str_res[4]

# Save results for future plotting

In [37]:
benchmark_stats.to_csv(os.path.join(OUTPUT_PATH, "benchmark_stats.csv"))
strs_res_stats.to_csv(os.path.join(OUTPUT_PATH, "strs_res_stats.csv"))

strs_res_rtn.to_csv(os.path.join(OUTPUT_PATH, "rtn.csv"))
strs_res_rtn_com.to_csv(os.path.join(OUTPUT_PATH, "rtn_com.csv"))
strs_res_rtn_alpha.to_csv(os.path.join(OUTPUT_PATH, "alpha.csv"))
strs_res_rtn_alpha_com.to_csv(os.path.join(OUTPUT_PATH, "alpha_com.csv"))
pd.Series(index=best_factors, data=seq_window[::-1], name="Sequential window size").to_csv(os.path.join(OUTPUT_PATH, "multivariate_input.csv"))

with open(os.path.join(OUTPUT_PATH, "ports.pkl"), "wb") as f:
    pickle.dump(strs_res_port, f)