# Multi-factor index computation
### Inspired of MSCI multi-factor index

In [10]:
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats

#### Import database

In [37]:
df = pd.read_csv('MSCI Composition 2023 Q1.csv', index_col='Yahoo Ticker')
df_stats = pd.read_csv('MSCI World 2023 Q1 Statitics modified.csv', index_col='Unnamed: 0')

#### Built-in functions 

In [38]:
def ranking(value, scale_min=0, scale_max=100, nan_policy='raise', z_score=None, outlier_policy='omit'):
    """
    Rescale a list of integers or floats from scale_min to scale_max
    """
    type_value = type(value)
    if type_value != pd.core.series.Series: 
        value = pd.Series(value)
        
    if nan_policy == 'raise' and ~np.isnan(value).any():
        pass
    elif nan_policy == 'raise' and np.isnan(value).any(): 
        raise ValueError('The input contains nan values')
    elif nan_policy == 'propagate' and np.isnan(value).any():
        value[:] = np.nan
    elif nan_policy == 'omit': 
        pass
    else: 
        raise ValueError("nan_policy must be one of {'propagate', 'raise', 'omit'}")
        
    if z_score == None: 
        pass
    elif type(z_score) == int or float:
        mask = abs(stats.zscore(value, nan_policy='omit'))<z_score
        value, outlier = [value.where(mask, np.nan), value.where(mask==False, np.nan)]
    else:
        raise ValueError('z_score must be None or int or float')

    ranke = ((value-value.min()) / (value.max()-value.min())) * (scale_max-scale_min) + scale_min

    if z_score == None: 
        return ranke
    else:
        if outlier_policy == 'raise' and np.isnan(outlier).any(): 
            raise ValueError('The outliers contains nan values')
        elif outlier_policy == 'omit': 
            return ranke
        elif outlier_policy == 'propagate':
            outlier = [scale_max if outl > max(value) else scale_min if outl < min(value) else np.nan for outl in outlier]
            ranke = ranke.where(np.isnan(outlier), outlier)
            return ranke
        else:
            raise ValueError("outlier_policy must be one of {'propagate', 'raise', 'omit'}")

In [39]:
def factor_ranking(*args):
    df = pd.DataFrame(args).transpose()
    return ranking(value=df.sum(axis=1), nan_policy='omit')

#### Factor Value Computation

In [40]:
PE = pd.to_numeric(df_stats.loc['Trailing P/E '], errors='coerce')
PB = pd.to_numeric(df_stats.loc['Price/Book (mrq)'], errors='coerce')

PE_ranke = ranking(value=-PE, scale_max=67, nan_policy='omit', z_score=2, outlier_policy='propagate')
PB_ranke = ranking(value=-PB, scale_max=33, nan_policy='omit', z_score=2, outlier_policy='propagate')

value = factor_ranking(PE_ranke,PB_ranke)
value.name = 'value'

#### Factor Momentum Computation

In [41]:
MA200 = df_stats.loc['200-Day Moving Average 3'].replace(',','')
MA200 = pd.to_numeric(MA200, errors='coerce')

CP = df_stats.loc['Current Price'].replace(',','')
CP = pd.to_numeric(CP, errors='coerce')

MO = pd.Series(data=100 * np.log(list(CP/MA200)), index=df_stats.columns)

momentum = factor_ranking(MO)
momentum.name = 'momentum'

#### Factor Size Computation

In [42]:
MC = df_stats.loc['Market Cap (intraday) '].astype(float)

size = factor_ranking(-MC)
size.name = 'size'

#### Factor Quality Computation

In [43]:
CR = pd.to_numeric(df_stats.loc['Current Ratio (mrq)'], errors='coerce')
CR_target = np.exp(-(CR-1)**2/100)

DE = pd.to_numeric(df_stats.loc['Total Debt/Equity (mrq)'], errors='coerce')

RoE = df_stats.loc['Return on Equity (ttm)']
RoE = RoE.replace(to_replace='%',value='',regex=True)
RoE = pd.to_numeric(RoE,errors='coerce')

RoA = df_stats.loc['Return on Assets (ttm)']
RoA = RoA.replace(to_replace='%',value='',regex=True)
RoA = pd.to_numeric(RoA,errors='coerce')

CR_ranke = ranking(value=CR_target, scale_max=25, nan_policy='omit', z_score=2, outlier_policy='propagate')
DE_ranke = ranking(value=-DE, scale_max=25, nan_policy='omit', z_score=2, outlier_policy='propagate')
RoE_ranke = ranking(value=RoE, scale_max=25, nan_policy='omit', z_score=2, outlier_policy='propagate')
RoA_ranke = ranking(value=RoA, scale_max=25, nan_policy='omit', z_score=2, outlier_policy='propagate')

quality = factor_ranking(CR_ranke, DE_ranke, RoE_ranke, RoA_ranke)
quality.name = 'quality'

#### Global Ranking Computation

In [44]:
factor = pd.concat([value,momentum,size,quality],join='inner',axis=1)
factor.head(10)

Unnamed: 0,value,momentum,size,quality
AAPL,57.123127,56.316655,0.0,39.295592
MSFT,77.496556,55.155775,19.753086,94.747296
AMZN,28.520688,44.900114,58.436214,67.149192
GOOGL,86.127731,47.011377,49.794239,92.057374
NVDA,31.149483,73.527666,77.726337,82.220833
TSLA,59.995388,48.678117,73.692181,93.691115
GOOG,86.092754,46.798865,49.794239,92.057374
XOM,94.618501,61.429353,80.559259,93.682086
UNH,84.413883,51.74064,81.260905,82.869505
JNJ,84.160075,48.94172,82.974486,85.128329


In [45]:
ranke = factor.sum(axis=1).sort_values(ascending=False)
ranke.head(10)

BSL.AX       361.356590
VOE.VI       357.736486
STM.PA       357.639704
NHY.OL       355.201256
TECK-B.TO    355.134869
RIO.AX       350.631473
TFII.TO      350.524355
BOL.ST       349.151957
III.L        348.845184
WLK          348.271161
dtype: float64

#### Final Index Visualization

In [46]:
ticker = input('ticker: ')
name = df['Name'][df.index == ticker].values[0]
ticker_factor = pd.DataFrame(factor.loc[ticker]).reset_index()
px.line_polar(ticker_factor, 
              r=ticker, 
              theta='index', 
              line_close=True, 
              range_r=(0,100),
              title=f"{name} factors' ranke",
             )

ticker: MSFT


In [47]:
ranke.name = 'Grade'
df=pd.concat([df, ranke], axis=1)

fig = px.sunburst(
    df,
    path = [px.Constant('MSCI World'), 'Location', 'Sector', 'Name'],
    values = 'Grade',
    maxdepth = 2,
    title = 'MSCI World Multi-Factors Allocation',
    width = 750,
    height = 750
)
fig.show()