In [1]:
import pandas as pd
import numba
import numpy as np
from scipy.stats import zscore
import plotly as px
import pandas_datareader as web

In [2]:
df = pd.read_csv('data/data.csv', index_col=0).set_index(['equity','date'])

In [5]:
rfr = web.get_data_fred("IRLTLT01EZM156N", df.index.get_level_values('date').min(), df.index.get_level_values('date').max()).div(100)
rfr = pd.Series(index=df.index.get_level_values('date').unique()[1:], data=rfr.values[:, 0], name='risk_free_rate')

In [6]:
df_prices = df['PX_LAST'].unstack(level=0).replace(0, np.nan)
df_log_rtn = df_prices.apply(lambda x: np.log(x / x.shift(1))).dropna(how='all')
benchmark_rtn = df_log_rtn.mean(skipna=True, axis='columns')

In [7]:
def select_best_equities_from_factor(df_full, df_rnt, factor_selected, num_equities):
    factor_values = df_full[factor_selected].unstack(level=0).iloc[1:]
    equities =  factor_values.apply(
        lambda fact_val: fact_val.loc[~df_rnt.loc[fact_val.name, fact_val.index].isna()
        ].sort_values(ascending=False).head(num_equities).index.values,
        axis='columns', result_type='expand')
    equities.columns = [str(i) + "_name" for i in range(len(equities.columns))]
    return equities, factor_values

def compute_return_from_equities(df_rtn, equities):
    equities_rtn = equities.apply(lambda row: df_rtn.loc[row.name, row.values].values, axis='columns', result_type='expand')
    equities_rtn.columns = [str(i) + "_rtn" for i in range(len(equities_rtn.columns))]
    equities_rtn.columns.name = 'equity_rtn'
    return equities_rtn

def extract_portfolio_state(equities, factor_values):
    equities_sparse = pd.DataFrame(index=factor_values.index, columns=factor_values.columns, data=0)
    for date in equities_sparse.index:
        equities_sparse.loc[date, equities.loc[date].values] = 1
    equities_sparse = (2 * equities_sparse - equities_sparse.shift(1, fill_value=0)).dropna()
    return equities_sparse.applymap(lambda x: 'not in' if x == 0 else 'in' if x == 1 else 'added' if x == 2 else 'removed')

def apply_fees(equities_rtn, equities_names, port_state, fees):
    return equities_rtn.apply(
        lambda rtn_row: rtn_row -
                        (fees * (port_state.loc[rtn_row.name, equities_names.loc[rtn_row.name]] == 'added')
                         * (1 + rtn_row).values).values,
        axis='columns')

def compute_univariate_strategy(df_full, df_rtn, factor_selected, num_equities, market_rtn, fees=None):
    equities_names, factor_values = select_best_equities_from_factor(df_full, df_rtn, factor_selected, num_equities)
    equities_rtn = compute_return_from_equities(df_rtn, equities_names)
    port_state = extract_portfolio_state(equities_names, factor_values)

    if fees is not None:
        equities_rtn = apply_fees(equities_rtn, equities_names, port_state, fees)

    equities = equities_names.merge(equities_rtn, left_index=True, right_index=True)
    equities = equities.iloc[:, np.concatenate([[i, i + num_equities] for i in range(num_equities)])]
    equities.columns = pd.MultiIndex.from_product([range(num_equities), ['name', 'rtn']])
    equities.columns.names = ['equity_pos', 'info']

    ris = equities.apply(lambda row: np.average(row.loc[:, "rtn"].values), axis='columns').to_frame(name='strategy_rtn')
    ris['strategy_alpha'] = ris['strategy_rtn'] - market_rtn
    info_ratio = ris['strategy_alpha'].mean() / ris['strategy_alpha'].std()
    return equities, ris, info_ratio, port_state

In [8]:
window_date_size = 12

@numba.jit(nopython=True)
def compute_window_corr_avg(window):
    if len(window) == window_date_size:
        corr_matrix = np.corrcoef(window, rowvar=False)
        corr_matrix_nod_iag = np.extract(~np.eye(corr_matrix.shape[0], dtype=numba.boolean), corr_matrix).reshape(corr_matrix.shape[0], -1)
        avg_corr = np.sum(corr_matrix_nod_iag, axis=1) / corr_matrix_nod_iag.shape[1]

    else:
        avg_corr = np.ones(window.shape[1])
    return avg_corr

In [9]:
def compute_zscore_strategy_simple(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    df_full['zscore'] = df_full[factors_selected].groupby(level=0).apply(lambda x: zscore(x).mean(axis=1)).values
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

def compute_zscore_strategy_weighted(df_full, df_rtn, factors_selected, num_equities, market_rtn, fees=None):
    zs_factors = df[factors_selected].groupby(level=0, group_keys=False).apply(lambda x: zscore(x))

    zs_weight = zs_factors.groupby(level=0).rolling(window_date_size, min_periods=1, method="table"
                                                    ).apply(compute_window_corr_avg, raw=True, engine="numba").values

    zs_weight = 1 - abs(zs_weight) + 0.000001 # si somma un piccolo valore per le prime date in cui i pesi sono 0
    df_full['zscore'] = np.average(zs_factors.values, axis=1, weights=zs_weight)
    return compute_univariate_strategy(df_full, df_rtn, 'zscore', num_equities, market_rtn, fees)

In [10]:
def compute_sequential_screening(df_full, df_rtn, filter_factor, filter_n_equities, market_rtn, fees=None):
    df_filtered = df_full.reset_index()
    for n_equities, factor in zip(filter_n_equities[:-1], filter_factor[:-1]):
        eq_filter = select_best_equities_from_factor(df_filtered.set_index(['equity', 'date']), df_rtn, factor, n_equities)[0]
        df_filtered = pd.concat([df_filtered.loc[(df_filtered['date'] == date) & (df_filtered['equity'].isin(equities.values))
                                 ] for date, equities in eq_filter.iterrows()])
    return compute_univariate_strategy(df_filtered.set_index(['equity', 'date']), df_rtn, filter_factor[-1], filter_n_equities[-1], market_rtn, fees)

# Applicazione delle strategie univariate su più fattori

In [11]:
# Calcolo equities e information ratio per ogni factor scelto
testing_factors = ['PE_RATIO', 'EBITDA_MARGIN', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD', 'RSI_14D', 'VOLATILITY_30D', 'CUR_MKT_CAP', 'OPERATING_ROIC']

strs_ratio = pd.Series(index=testing_factors, name='info_ratio',
                       data=[compute_univariate_strategy(df, df_log_rtn, factor, 10, benchmark_rtn, fees=0.001)[2]
                             for factor in testing_factors])

best_factors = strs_ratio.sort_values(ascending=False).head(4).index.values
best_factors

array(['RSI_14D', 'PX_TO_BOOK_RATIO', 'VOLATILITY_30D', 'OPERATING_ROIC'],
      dtype=object)

In [12]:
zscore_strategy_simple = compute_zscore_strategy_simple(df, df_log_rtn, best_factors, 10, benchmark_rtn, fees=0.001)
zscore_strategy_simple

(equity_pos        0                  1                   2                  3  \
 info           name       rtn     name       rtn      name       rtn     name   
 date                                                                            
 2003-02-28   COX FP -1.947431   PIC BB  0.016407  RAP1V FH -0.025723   ANZ GR   
 2003-03-31  HEIA NA  0.025024   PIC BB -0.073943    TFI FP -0.017136   ANZ GR   
 2003-04-30   ANZ GR  0.000000   BOL FP  0.119561    TFI FP  0.186861   SAN FP   
 2003-05-30   MDN GR  0.178949  MOBB BB  0.148097    PIC BB -0.005543   OHB GR   
 2003-06-30   SPA BB -0.054435   MDN GR -0.057481    ANZ GR -0.038004   PIC BB   
 ...             ...       ...      ...       ...       ...       ...      ...   
 2011-11-30    PC IM  0.088898   SZU GR  0.102842    DLG IM -0.008255   WET GR   
 2011-12-30   SZU GR  0.045651   OLE SM -0.001000    VID SM  0.031757    PC IM   
 2012-01-31   OLE SM  0.067441   DLG IM  0.197577    PRC FP  0.013514   TCH FP   
 2012-02-29   BV

# Sequential screening

In [13]:
seq_strategy = compute_sequential_screening(df, df_log_rtn, best_factors, [400, 200, 100, 30], benchmark_rtn, fees=0.001)
seq_strategy

(equity_pos      0                  1                 2                  3   \
 info          name       rtn     name       rtn    name       rtn     name   
 date                                                                         
 2003-05-30   EC FP  0.031229   SAP GR  0.037930  PUM GR  0.051252   RAA GR   
 2003-06-30   EC FP -0.160343   SAP GR  0.073231  RAA GR -0.089612   MDN GR   
 2003-07-31   EC FP -0.023250   SAP GR  0.021958  PUM GR  0.158754   RAA GR   
 2003-08-29   EC FP  0.137022   SAP GR  0.040327  PUM GR  0.018546   RAA GR   
 2003-09-30  ZOT SM -0.000332   SAP GR -0.041280  RAA GR -0.012579   EVD GR   
 ...            ...       ...      ...       ...     ...       ...      ...   
 2011-11-30  EUR FP -0.171787  LCA1 GR  0.050979  EVS BB  0.043758   BOS GR   
 2011-12-30  EUR FP -0.038001   BOS GR -0.120377  OEL GR  0.002701   ASM NA   
 2012-01-31  EUR FP  0.131769  LCA1 GR  0.212322  BOS GR  0.152467   RMS FP   
 2012-02-29  EVS BB  0.072787   AAD GR  0.101425  NS

# Simultanous screening weighted

In [14]:
zscore_strategy_weighted = compute_zscore_strategy_weighted(df, df_log_rtn, best_factors, 10, benchmark_rtn, fees=0.001)
# zscore_strategy_weighted = compute_zscore_strategy_weighted(df, df_log_rtn, best_factors, 10, benchmark_rtn, fees=0.001)
zscore_strategy_weighted

  sub_result = numba_func(window, *args)
  result = np.where(min_periods_mask, result, np.nan)


(equity_pos        0                  1                   2                  3  \
 info           name       rtn     name       rtn      name       rtn     name   
 date                                                                            
 2003-02-28   COX FP -1.947431   PIC BB  0.016407  RAP1V FH -0.025723   ANZ GR   
 2003-03-31  HEIA NA  0.025024   PIC BB -0.073943    TFI FP -0.017136   ANZ GR   
 2003-04-30   ANZ GR  0.000000   BOL FP  0.119561    TFI FP  0.186861   SAN FP   
 2003-05-30   MDN GR  0.178949  MOBB BB  0.148097    PIC BB -0.005543   OHB GR   
 2003-06-30   SPA BB -0.054435   MDN GR -0.057481    ANZ GR -0.038004   PIC BB   
 ...             ...       ...      ...       ...       ...       ...      ...   
 2011-11-30    PC IM  0.088898   DLG IM -0.008255    KUL GR  0.062873   SZU GR   
 2011-12-30   SZU GR  0.045651    PC IM -0.072624    DLG IM -0.137917   WET GR   
 2012-01-31   DLG IM  0.197577    PC IM  0.083289    TCH FP  0.673672   PRC FP   
 2012-02-29    P

In [46]:
def evaluate_strategy(strategy_result, market_rtn, risk_free_rate):
    rtn, alpha = strategy_result[1]['strategy_rtn'], strategy_result[1]['strategy_alpha']

    rtn_mean = rtn.mean()
    std = rtn.std()
    downside_std = np.sqrt(np.sum((rtn[rtn < 0] - rtn.mean())**2) / (len(rtn) - 1))

    alpha = alpha.mean()
    risk_adjusted = rtn_mean / std
    sharpe_ratio = (rtn - risk_free_rate).mean() / std
    beta = market_rtn.cov(rtn) / market_rtn.var()
    treynor_ratio = (rtn - risk_free_rate).mean()  / beta
    sortino_ratio = (rtn - risk_free_rate).mean() / downside_std

    stats = pd.Series(index=['mean', 'std', 'downside_std', 'alpha', 'risk_adjusted', 'sharpe_ratio', 'beta', 'treynor_ratio', 'sortino_ratio'],
                     data=[rtn_mean, std, downside_std, alpha, risk_adjusted, sharpe_ratio, beta, treynor_ratio, sortino_ratio], name='stats')

    rtn_com = rtn.cumsum()

    return stats, rtn_com


In [47]:
evaluate_strategy(zscore_strategy_weighted, benchmark_rtn, rfr)

(mean             0.069052
 std              0.078234
 downside_std     0.051462
 alpha            0.062446
 risk_adjusted    0.882640
 sharpe_ratio     0.367165
 beta             1.054241
 treynor_ratio    0.027247
 sortino_ratio    0.558173
 Name: stats, dtype: float64,
 date
 2003-02-28      -0.216546
 2003-03-31      -0.214336
 2003-04-30      -0.044064
 2003-05-30       0.054023
 2003-06-30       0.107995
                  ...     
 2011-11-30     802.045773
 2011-12-30     803.908296
 2012-01-31     944.533064
 2012-02-29    1064.642770
 2012-03-30    1148.927302
 Name: strategy_rtn, Length: 110, dtype: float64)

In [44]:
rtn = zscore_strategy_weighted[1]['strategy_rtn']

In [50]:
pd.DataFrame(index=[])

date
2003-02-28   -0.216546
2003-03-31   -0.213726
2003-04-30    0.002999
2003-05-30    0.105607
2003-06-30    0.156812
                ...   
2011-11-30    7.212577
2011-12-30    7.214896
2012-01-31    7.389605
2012-02-29    7.516634
2012-03-30    7.595727
Name: strategy_rtn, Length: 110, dtype: float64