In [2]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
import warnings
from sklearn.linear_model import LinearRegression

warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv('data/data.csv').set_index(['equity', 'date'])
df = df.drop(df.groupby(level=0).filter(lambda equity: (equity.isna().sum(axis=1) == len(equity.columns)).any()).index)
df = df.drop(df.groupby(level=0).filter(lambda equity: equity['PX_LAST'].eq(0).any()).index).reset_index()

df_factor_chosen: for every date, selected the best equities by the factor chosen \
column_to_order: name of factor \
number_of_equities: number of equities to select

In [4]:
df_returns = df[['equity','date','PX_LAST']]
df_returns.sort_values(by=['equity','date'], inplace=True)
df_returns = df_returns.pivot(index='date', columns='equity', values='PX_LAST')

In [5]:
df_log_returns = df_returns[df_returns.columns].apply(lambda x: np.log(x / x.shift(1)))
benchmark_rtn = df_log_returns.dropna(how='all').mean(skipna=True, axis='columns')

In [6]:
def select_equities(dataframe, factor, number_of_equities, df_rtn, bench_rtn):
    temp_dict = {}
    dates = list(set(dataframe['date']))
    for date in dates:
        temp_dict[date] =  dataframe.loc[dataframe['date'] == date].sort_values(by=['date',factor], ascending=False)['equity'].head(number_of_equities).values

    df_factor_chosen = pd.DataFrame.from_dict(temp_dict, orient='index').sort_index()

    df_factor_chosen_returns = df_factor_chosen.copy()
    sum_row_returns = 0
    list_sum_row_returns = []
    for index, row in df_factor_chosen.iterrows():
        for equity in row:
            sum_row_returns += np.nansum(df_rtn.loc[index][equity])
        list_sum_row_returns.append(round(sum_row_returns,2))
        sum_row_returns = 0

    df_factor_chosen_returns['returns'] = list_sum_row_returns
    df_factor_chosen_returns = df_factor_chosen_returns.iloc[1:,:]

    df_factor_chosen_returns['alpha'] = df_factor_chosen_returns['returns'] - bench_rtn
    information_ratio = df_factor_chosen_returns['alpha'].mean() / df_factor_chosen_returns['alpha'].std()

    return df_factor_chosen_returns, information_ratio

df_returns: for each equity, the date and the price

df_log_returns: log returns for each equity by doing log( price(t) / price(t+1) )

df_factor_chosen_returns: adding a column with returns for each date looking at the price of every equity selected.

In [7]:
# Calcolo equities e information ratio per ogni factor scelto
s_info_ratio = pd.Series(index=[['PE_RATIO', 'EBITDA_MARGIN', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD', 'RSI_14D', 'VOLATILITY_30D', 'CUR_MKT_CAP', 'OPERATING_ROIC']], name='information_ratio')
equities_selected = {}

for factor in s_info_ratio.index:
    df_factor_chosen, information_ratio = select_equities(df, factor[0], 10, df_log_returns, benchmark_rtn)
    s_info_ratio.loc[factor[0]] = information_ratio
    equities_selected[factor[0]] = df_factor_chosen

s_info_ratio

PE_RATIO                         0.160940
EBITDA_MARGIN                    0.158318
PX_TO_BOOK_RATIO                 0.408200
NORMALIZED_ACCRUALS_CF_METHOD    0.118331
RSI_14D                          1.943153
VOLATILITY_30D                   0.246033
CUR_MKT_CAP                      0.133747
OPERATING_ROIC                   0.191919
Name: information_ratio, dtype: float64

In [8]:
# estrazione dei 4 factor con information ratio più alto
factor_to_use = s_info_ratio.sort_values(ascending=False).head(4).index.get_level_values(0).values

In [9]:
factor_to_use

array(['RSI_14D', 'PX_TO_BOOK_RATIO', 'VOLATILITY_30D', 'OPERATING_ROIC'],
      dtype=object)

In [17]:

# aggiungo la colonna zscore aggregata per i fattori scelti
df['zscore'] = df.set_index(['equity', 'date'])[factor_to_use].groupby(level=0).apply(lambda x: zscore(x.loc[:, factor_to_use]).mean(axis=1)).values

In [18]:
zscore_equities, zscore_info_ratio = select_equities(df, 'zscore', 10, df_log_returns, benchmark_rtn)

In [21]:
df

Unnamed: 0,equity,date,10_YEAR_MOVING_AVERAGE_PE,5YR_AVG_RETURN_ON_EQUITY,BEST_EPS,CURRENT_EV_TO_12M_SALES,CURRENT_EV_TO_T12M_EBITDA,CUR_MKT_CAP,EBITDA_MARGIN,EBITDA_MARGIN_3YR_AVG,...,T12M_DIL_PE_CONT_OPS,T12M_DVD_PAYOUT_RATIO,TANG_BOOK_VAL_PER_SH,TRAIL_12M_EBITDA_PER_SHARE,TRAIL_12M_SALES_PER_SH,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_90D,WACC_COST_EQUITY,zscore
0,2HR GR,2003-01-31,0.0,0.0000,0.000,0.5451,4.9943,109.5236,10.9136,0.0000,...,15.9232,0.0000,1.7176,0.7867,7.2082,38.383,39.529,32.508,6.4849,-1.007338
1,2HR GR,2003-02-28,0.0,0.0000,0.000,0.5510,5.0486,110.6586,10.9136,0.0000,...,16.0882,0.0000,1.7176,0.7867,7.2082,39.094,43.347,34.569,6.4849,-0.928497
2,2HR GR,2003-03-31,0.0,0.0000,0.000,0.5524,5.2982,106.8754,10.4260,0.0000,...,27.7478,0.0000,1.7189,0.7478,7.1729,42.295,50.877,40.056,6.3121,-0.585420
3,2HR GR,2003-04-30,0.0,0.0000,0.000,0.5673,5.4410,109.7128,10.4260,0.0000,...,28.4844,0.0000,1.7189,0.7478,7.1729,38.535,33.101,40.319,6.3121,-0.805814
4,2HR GR,2003-05-30,0.0,0.0000,0.000,0.5712,5.4791,110.4694,10.4260,0.0000,...,28.6809,0.0000,1.7189,0.7478,7.1729,33.126,15.741,35.860,6.3121,-1.058572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66928,ZOT SM,2011-11-30,0.0,96.5949,0.539,4.6261,13.1037,3837.7349,35.3037,36.1603,...,0.0000,24.0383,0.1661,0.7691,2.1781,27.921,30.505,33.810,11.8730,-0.399425
66929,ZOT SM,2011-12-30,0.0,96.5949,0.547,4.6888,13.2813,3889.1003,35.3037,36.1603,...,0.0000,24.0383,0.1661,0.7691,2.1781,28.339,29.731,31.522,11.8730,-0.399029
66930,ZOT SM,2012-01-31,0.0,96.5949,0.537,4.8232,13.6619,3999.1689,35.3037,36.1603,...,0.0000,24.0383,0.1661,0.7691,2.1781,28.832,19.503,27.909,11.8730,-0.598322
66931,ZOT SM,2012-02-29,0.0,96.5949,0.538,4.4738,12.6723,3712.9900,35.3037,36.1603,...,0.0000,24.0383,0.1661,0.7691,2.1781,28.265,21.408,24.781,11.8730,-0.649701


In [19]:
zscore_info_ratio

0.8978267783676801

In [20]:
zscore_equities

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,returns,alpha
2003-02-28,COX FP,PIC BB,RAP1V FH,ANZ GR,HEIA NA,HEIO NA,SAZ GR,SAN FP,MUK GR,MDN GR,-2.16,-2.105722
2003-03-31,HEIA NA,PIC BB,TFI FP,ANZ GR,HEIO NA,MOBB BB,REN NA,SAN FP,SPA BB,RAP1V FH,0.03,0.055749
2003-04-30,ANZ GR,BOL FP,TFI FP,SAN FP,PIC BB,PUM GR,SAP GR,SPA BB,LPK GR,LEI GR,2.17,2.059852
2003-05-30,MDN GR,MOBB BB,PIC BB,OHB GR,SPA BB,SAZ GR,RHK GR,TFI FP,PUM GR,LPK GR,1.03,0.991772
2003-06-30,SPA BB,MDN GR,ANZ GR,PIC BB,HEIA NA,SAZ GR,SAP GR,BZU IM,TNG FP,OLG GR,0.52,0.472080
...,...,...,...,...,...,...,...,...,...,...,...,...
2011-11-30,PC IM,SZU GR,DLG IM,WET GR,KUL GR,TPL GR,BVB GR,LCA1 GR,BOS GR,DUE GR,0.42,0.475321
2011-12-30,SZU GR,OLE SM,VID SM,PC IM,BVB GR,DLG IM,WET GR,TPL GR,JUVE IM,KUL GR,0.09,0.098528
2012-01-31,OLE SM,DLG IM,PRC FP,TCH FP,PC IM,BVB GR,VID SM,LCA1 GR,RMS FP,DUE GR,1.75,1.678777
2012-02-29,BVB GR,PC IM,OLE SM,DUE GR,TCH FP,VID SM,DLG IM,RMS FP,PNL NA,SCUN GR,1.01,0.963640
