In [None]:
import pandas as pd
import numpy as np
from scipy import stats

In [None]:
# Read in the data for the SAI and Fama-French methods
ret = pd.read_csv('ret.csv', index_col='date')
ret_FF = pd.read_csv('ret_FF.csv', index_col='date')
siz = pd.read_csv('siz.csv')
FF = pd.read_csv('EM_factors.csv', index_col='date')

In [None]:
ret_FF.head()

In [None]:
# Join Fama-French result with SAI results
ret['FF'] = ret_FF['Returns'].values
siz['FF'] = ret_FF['Length'].values

In [None]:
# Get empty dataframe to drop results
results = ret[ret['benchmark']=='lllll']

In [None]:
# Set data1 column as index
FF['date1'] = FF.index

In [None]:
# Get a dataframe for risk-free-rate
rfr = FF[['RF', 'date1']]

In [None]:
# Set date1 column as index
ret['date1'] = ret.index

In [None]:
# Make date column a string to join the dataframes on
ret['joindate'] = ret['date1'].astype(str)
rfr['joindate'] = rfr['date1'].astype(str)

In [None]:
# But the return join date in the right format
for i in range(len(ret)):
    ret.iloc[i,-1] = ret.iloc[i,-1][:4] + ret.iloc[i,-1][5:7]

In [None]:
# Set risk-free-rate to the right format
rfr['RF'] = (rfr['RF']/100)+1

In [None]:
# Calculate quarterly risk-free-rate from monthly risk-free-rate
rfr['rf'] = 0 

for i in range(2, len(rfr)):
    rfr.iloc[i,-1] = rfr.iloc[i-2:i+1,0].prod(axis=0)

In [None]:
# set the index as joindate
rfr = rfr.set_index('joindate')
ret = ret.set_index('joindate')

In [None]:
# Join the dataframes
ret = ret.join(rfr, lsuffix='', rsuffix='f')

In [None]:
# Reset the indexes and drop unwanted columns
ret = ret.set_index('date1')
ret = ret.drop(['date1f', 'RF'], axis =1)

In [None]:
# Calculate annualised returns
total_ret = (ret.prod(axis=0)**(1/((len(ret)/4)-0.25)))-1

In [None]:
# Set rfr as annualised risk-free-rate
rfr = total_ret['rf']

In [None]:
# Set annualised return in results dataframe
results.loc['Annualised Return'] = total_ret.iloc[:-1]

In [None]:
# Calculate annualised standard deviation and store them
ret_std = ret.std(axis=0)*2
results.loc['STD of Retrun'] = ret_std.iloc[:-1]

In [None]:
# Calculate mean portfolio size and store them
mean_siz = siz.mean(axis=0)
results.loc['Mean # of Equities'] = mean_siz.iloc[:]

In [None]:
# Get a copy of the returns dataframe
ret_for_st = ret.copy()

# Sharpe raitio

In [None]:
# Get a dataframe of excess returns 
excess_ret = ret.copy()-1

rfr1 = excess_ret['rf']
for c in excess_ret .columns:
    excess_ret[c] = excess_ret[c] - rfr1

# Calculate sharpe ratios
sharpe = (excess_ret.mean(axis=0)*2)/excess_ret.std(axis=0)
print(sharpe)

In [None]:
# Store shapre ratios
results.loc['Sharpe Ratio'] = sharpe

In [None]:
# Calculate and store t-test on sharpe ratio
results.loc['Sharpe t-test']= sharpe*(((len(ret)/4)-0.25)**0.5)

In [None]:
# Calculate the p-value for t-test
import scipy.stats
#find p-value
deg_freedom = ((len(ret)/4)-0.25)-1

results.loc['Sharpe P-val']= scipy.stats.t.sf(results.loc['Sharpe t-test'].tolist(), df=deg_freedom)

# tracking error

In [None]:
# Get dataframe of relative returns
relative_ret = ret.copy()
bench = relative_ret['benchmark']
for c in relative_ret.columns:
    print(c)
    relative_ret[c] = relative_ret[c] - bench

In [None]:
# Drop risk-free-rate
relative_ret = relative_ret.drop(['rf'], axis=1)

In [None]:
# Calculate and store the tracking error
t_error = relative_ret.std(axis=0)
results.loc['Tracking error'] = t_error

# Information ratio

In [None]:
# Calculate mean relative returns
annualised_rel_ret = ((relative_ret).mean(axis=0))*2

In [None]:
# Calculate information ratio
info_ratio = annualised_rel_ret/t_error

In [None]:
info_ratio

In [None]:
# Store information ratio and its t-test in results
results.loc['Info Ratio'] = info_ratio
results.loc['Info Ratio t-stat'] = info_ratio*((len(ret)/4-0.25)**0.5)

In [None]:
# Calculate degrees of freedom
deg_freedom = ((len(ret)/4)-0.25)-1
# Calculate and store p-value on information ratio
results.loc['Info Ratio p-val'] = scipy.stats.t.sf(results.loc['Info Ratio t-stat'].tolist(), df=deg_freedom)

# target shuffle

In [None]:
# Read target shuffle data
TS = (pd.read_csv('ts_final.csv')**(1/(len(ret)/4-0.25))-1)

In [None]:
# Set place for target shuffle percentile
results.loc['Target Shuffle Percentile'] = 0

In [None]:
# Calculate and store target shuffle percentile 
for i in range(len(TS.columns)):
    print(TS.columns[i])
    print(stats.percentileofscore(TS.iloc[:,i],total_ret[i]))
    results.iloc[-1,i] = stats.percentileofscore(TS.iloc[:,i],total_ret[i])

In [None]:
# Drop unwanted columns
results = results[[ 'benchmark',
 'FF','assoc_rules',
 'chi',
 'LMM',
 'odds',
 'chi_or_LLM',
 'chi_and_LLM',
 'chi_or_odds',
 'chi_and_odds',
 'LLM_or_odds',
 'LLM_and_odds',
 'chi_or_odds_or_LLM',
 'chi_and_odds_and_LLM',
]]

# Drop results 
results.to_excel('main_results_table.xlsx',index=True)

# Factor Attribution

In [None]:
# Take one from returns 
ret = ret -1

In [None]:
# Calculate excess returns
RFR = ret['rf']

for c in ret.columns:
    print(c)
    ret[c] = ret[c] - RFR

In [None]:
# Make a date as a join key
ret['joindate'] = ret.index.astype(str)
FF['joindate'] = FF['date1'].astype(str)

In [None]:
# Set key in the right format
for i in range(len(ret)):
    ret.iloc[i,-1] = ret.iloc[i,-1][:4] + ret.iloc[i,-1][5:7] 

In [None]:
# Set index as join date
FF = FF.set_index('joindate')
ret = ret.set_index('joindate')

In [None]:
# Join the dataframes
ret = ret.join(FF[['Mkt-RF', 'SMB', 'HML', 'RMW','CMA']], lsuffix='', rsuffix='f')

In [None]:
import statsmodels.api as sm
# Calculate the factor attributions for the returns data

column_names = ['Mkt-RF', 'SMB','HML','RMW' ,'CMA', 'Alpha']
# Create an empty DataFrame with column names
fa_results = pd.DataFrame(columns=column_names)

ret['intercept'] = 1

cols = ['assoc_rules', 'chi', 'LMM', 'odds', 'chi_or_LLM', 'chi_and_LLM',
       'chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds',
       'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM', 'FF']
for col in cols: 
    model = sm.OLS(ret[col], ret[['Mkt-RF', 'SMB', 'HML', 'RMW','CMA', 'intercept']]).fit()
    fa_results.loc[col] = [model.params[0], model.params[1], model.params[2], model.params[3], model.params[4], model.params[5]]
    fa_results.loc[col+' t-stat'] = [model.tvalues[0], model.tvalues[1], model.tvalues[2], model.tvalues[3], model.tvalues[4], model.tvalues[5]]
    fa_results.loc[col+' P-value'] = [model.pvalues[0], model.pvalues[1], model.pvalues[2], model.pvalues[3], model.pvalues[4], model.pvalues[5]]

# Style Tilt 

In [None]:
# Calculate excess returns
RFR = ret_for_st['rf']

for c in ret_for_st.columns:
    print(c)
    ret_for_st[c] = ret_for_st[c] - RFR

In [None]:
# Take one from returns
ret2 = ret_for_st.copy()-1

In [None]:
# Read style tilts data
tilts = pd.read_excel('Style_Tilts.xlsx', index_col='Date')

In [None]:
# Make a join key on data 
tilts['joindate'] = tilts.index.strftime('%m%Y')
ret2['joindate'] = pd.to_datetime(ret2.index).strftime('%m%Y')

In [None]:
# Set index as join key
tilts = tilts.set_index('joindate')
ret2 = ret2.set_index('joindate')

In [None]:
# Join the dataframes
ret2 = ret2.join(tilts, lsuffix='', rsuffix='f')

In [None]:
# Calculate the relative returns of the style tilts 
ret2['Small Cap'] = ret2['Small Cap'] - ret2['EM']
ret2['Value'] = ret2['Value'] - ret2['EM']
ret2['Quality'] = ret2['Quality'] - ret2['EM']
ret2['Min Vol'] = ret2['Min Vol'] - ret2['EM']
ret2['Momentum'] = ret2['Momentum'] - ret2['EM']
ret2['Dividend Yield'] = ret2['Dividend Yield'] - ret2['EM']
ret2['EM'] = ret2['EM'] - ret2['rf']


In [None]:
# Calculate the style tilt attributions 
column_names = ['EM', 'Small Cap', 'Value', 'Quality', 'Min Vol', 'Momentum', 'Dividend Yield', 'Alpha']
# Create an empty DataFrame with column names
st_results = pd.DataFrame(columns=column_names)

column_names2 = ['R^2', 'Residuals']
# Create an empty DataFrame with column names
RS_results = pd.DataFrame(columns=column_names2)



ret2['intercept'] = 1

cols = ['assoc_rules', 'chi', 'LMM', 'odds', 'chi_or_LLM', 'chi_and_LLM',
       'chi_or_odds', 'chi_and_odds', 'LLM_or_odds', 'LLM_and_odds',
       'chi_or_odds_or_LLM', 'chi_and_odds_and_LLM', 'FF']
for col in cols: 
    model = sm.OLS(ret2[col], ret2[['EM', 'Small Cap', 'Value', 'Quality', 'Min Vol', 'Momentum','Dividend Yield', 'intercept']]).fit()
    st_results.loc[col] = [model.params[0], model.params[1], model.params[2], model.params[3], model.params[4], model.params[5], model.params[6], model.params[7]]
    st_results.loc[col+' t-stat'] = [model.tvalues[0], model.tvalues[1], model.tvalues[2], model.tvalues[3], model.tvalues[4], model.tvalues[5], model.tvalues[6], model.tvalues[7]]
    st_results.loc[col+' P-value'] = [model.pvalues[0], model.pvalues[1], model.pvalues[2], model.pvalues[3], model.pvalues[4], model.pvalues[5], model.pvalues[6], model.pvalues[7]]
    RS_results.loc[col] = [model.rsquared, abs(model.resid).mean()]

In [None]:
# Drop results
RS_results.to_excel('RS_results.xlsx',index=True)

In [None]:
# Drop results
st_results.to_excel('style_tilts_results_table.xlsx',index=True)

fa_results.to_excel('factor_attribution_results_table.xlsx',index=True)