# Imports

In [82]:
from pathlib import Path

import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.formula.api import ols

from more_itertools import powerset

from constants import DataSplit, Model, METRICS, DATASET_SYMBOLS

# Parameters

In [3]:
data_dir = r'./out'
concat_results = True
alpha = 0.05

# Data Loading

In [22]:
path = Path(data_dir)

pattern = rf'{"**/*" if concat_results else ""}results.csv'

data = pd.concat([pd.read_csv(filename, index_col=0) for filename in path.glob(pattern)], ignore_index=True)
display(data)

Unnamed: 0,DecisionTree,RandomForest,SVM,KNN,LogisticRegression,RandomBaseline,ConstantBaseline,PreviousBaseline,ConsensusBaseline,forex,bond,index_futures,commodities_futures,SPY,Random,test,accuracy,weighted f1,macro f1,roc-auc
0,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,0.833458,0.832359,0.830981,0.933731
1,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,0.514970,0.508860,0.498796,0.491255
2,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,0.784738,0.777865,0.774654,0.888079
3,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,0.547904,0.420897,0.385600,0.499613
4,True,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,0.700646,0.700834,0.697850,0.767178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9175,False,False,False,False,False,False,False,False,True,True,False,True,True,True,False,True,0.495475,0.501318,0.475140,
9176,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,0.506523,0.510346,0.493364,
9177,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,0.503145,0.511602,0.474699,
9178,False,False,False,False,False,False,False,False,True,True,True,True,True,True,False,False,0.506523,0.510346,0.493364,


# Asset presence analysis (model-averaged, baseline models and training data excluded)

In [71]:
df = data.loc[data[DataSplit.TEST] & ~data[Model.RANDOM_BASELINE] & ~data[Model.CONSTANT_BASELINE] & ~data[Model.PREVIOUS_BASELINE] & ~data[Model.CONSENSUS_BASELINE]]
df = df.drop(columns=[DataSplit.TEST])

pivot = [
    df.loc[df['Random']]
    .pivot_table(values=[metric for metric in METRICS.keys()],
                 index='Random',
                 aggfunc=[np.mean, np.std]),
    df[~df[[asset_type for asset_type in DATASET_SYMBOLS.keys()] + ['Random']].any(axis=1)]
    .pivot_table(values=[metric for metric in METRICS.keys()],
                 index='SPY',
                 aggfunc=[np.mean, np.std])
]

for asset_type in DATASET_SYMBOLS.keys():
    pivot.append(df.loc[~df['Random']].pivot_table(values=[metric for metric in METRICS.keys()], index=asset_type, aggfunc=[np.mean, np.std]))

pivot = pd.concat(pivot, keys=[tab.index.name for tab in pivot], names=['asset type','presence']).rename(index={'SPY':'SPY-Only', 'Random':'Random Data'})

display(pivot)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,std,std,std
Unnamed: 0_level_1,Unnamed: 1_level_1,accuracy,macro f1,weighted f1,accuracy,macro f1,weighted f1
asset type,presence,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Random Data,True,0.512109,0.435936,0.446795,0.036709,0.059279,0.058084
SPY-Only,True,0.530632,0.467813,0.485363,0.022562,0.046459,0.035177
forex,False,0.538441,0.4841,0.503451,0.027006,0.050217,0.039871
forex,True,0.531317,0.476615,0.496262,0.02952,0.045346,0.035467
bond,False,0.532666,0.473,0.493073,0.026694,0.051098,0.039499
bond,True,0.537092,0.487714,0.506639,0.030062,0.043435,0.034946
index_futures,False,0.53165,0.475185,0.49499,0.027238,0.048321,0.037101
index_futures,True,0.538109,0.48553,0.504723,0.029383,0.047091,0.038074
commodities_futures,False,0.530491,0.476512,0.493806,0.024761,0.047428,0.037657
commodities_futures,True,0.539268,0.484203,0.505906,0.031217,0.048239,0.037178


# Asset combinations (model-averaged, baselines and training data excluded)

In [88]:
df = data.loc[data[DataSplit.TEST] & ~data['Random'] & ~data[Model.RANDOM_BASELINE] & ~data[Model.CONSTANT_BASELINE] & ~data[Model.PREVIOUS_BASELINE] & ~data[Model.CONSENSUS_BASELINE]]
df = df.drop(columns=[DataSplit.TEST, 'Random'])

df['forex'] = df['forex'].map({True:'F',False:''})
df['bond'] = df['bond'].map({True:'B',False:''})
df['index_futures'] = df['index_futures'].map({True:'I',False:''})
df['commodities_futures'] = df['commodities_futures'].map({True:'C',False:''})

df['asset combination'] = df['forex'] + df['bond'] + df['index_futures'] + df['commodities_futures']
df['asset combination'] = df['asset combination'].replace({'':'SPY-Only'})

pivot = df.pivot_table(values=[metric for metric in METRICS.keys()],
                       index='asset combination',
                       aggfunc=[np.mean, np.std])

# [print(c) for c in powerset('FBIC')]

display(pivot)

()
('F',)
('B',)
('I',)
('C',)
('F', 'B')
('F', 'I')
('F', 'C')
('B', 'I')
('B', 'C')
('I', 'C')
('F', 'B', 'I')
('F', 'B', 'C')
('F', 'I', 'C')
('B', 'I', 'C')
('F', 'B', 'I', 'C')


Unnamed: 0_level_0,mean,mean,mean,std,std,std
Unnamed: 0_level_1,accuracy,macro f1,weighted f1,accuracy,macro f1,weighted f1
asset combination,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
B,0.535652,0.487162,0.504061,0.022217,0.045115,0.034273
BC,0.548724,0.50401,0.522066,0.028653,0.040939,0.031528
BI,0.532202,0.474412,0.493262,0.023183,0.054507,0.043632
BIC,0.556017,0.510818,0.531867,0.029819,0.04214,0.033646
C,0.53092,0.462975,0.488646,0.029101,0.052304,0.038637
F,0.518958,0.463498,0.479864,0.021717,0.054509,0.042623
FB,0.528792,0.480746,0.497976,0.025686,0.041274,0.032195
FBC,0.523838,0.475251,0.494445,0.031762,0.034816,0.025472
FBI,0.523919,0.472981,0.490622,0.029013,0.0349,0.026366
FBIC,0.547595,0.496336,0.518813,0.031079,0.033287,0.023287


# Filtering

In [8]:
model = Model.SUPPORT_VECTOR_MACHINE

df = data.loc[data[DataSplit.TEST] & ~data['Random'] & data[model]]
df = df.drop(columns=[m.value for m in Model] + [DataSplit.TEST, 'SPY', 'Random'])
df = df.replace({True: 1, False: -1})

display(df)

Unnamed: 0,forex,bond,index_futures,commodities_futures,accuracy,weighted f1,macro f1,roc-auc
71,-1,1,-1,-1,0.568116,0.440925,0.401579,0.530074
73,-1,-1,-1,-1,0.558882,0.422175,0.385719,0.524007
75,-1,-1,-1,1,0.574713,0.419498,0.364964,0.512102
77,-1,1,-1,1,0.587537,0.485675,0.446751,0.540445
79,1,1,-1,-1,0.556522,0.442620,0.405233,0.536424
...,...,...,...,...,...,...,...,...
8967,1,-1,-1,1,0.572797,0.418609,0.364190,0.520255
8969,-1,-1,1,1,0.570470,0.436997,0.390281,0.547733
8971,1,-1,1,-1,0.556044,0.429584,0.394532,0.554514
8973,1,-1,1,1,0.572707,0.441662,0.395540,0.553636


# Regression fitting, identify notable factors & interactions

In [11]:
glm = ols('accuracy ~ forex * bond * index_futures * commodities_futures', data=df).fit()
display(glm.params)

aov = sm.stats.anova_lm(glm, typ=1)
display(aov)

Intercept                                       0.569525
forex                                          -0.000800
bond                                            0.003103
forex:bond                                     -0.000078
index_futures                                   0.002427
forex:index_futures                             0.003672
bond:index_futures                              0.001405
forex:bond:index_futures                        0.002997
commodities_futures                             0.008664
forex:commodities_futures                       0.000443
bond:commodities_futures                        0.001314
forex:bond:commodities_futures                 -0.000266
index_futures:commodities_futures               0.001248
forex:index_futures:commodities_futures         0.001099
bond:index_futures:commodities_futures          0.002253
forex:bond:index_futures:commodities_futures    0.000829
dtype: float64

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
forex,1.0,0.000307,0.000307,39.703116,6.87851e-10
bond,1.0,0.00462,0.00462,597.062431,2.2535139999999997e-85
forex:bond,1.0,3e-06,3e-06,0.379743,0.5380429
index_futures,1.0,0.002826,0.002826,365.246222,1.7495210000000002e-60
forex:index_futures,1.0,0.006472,0.006472,836.382287,6.783498000000001e-106
bond:index_futures,1.0,0.000948,0.000948,122.464757,2.019047e-25
forex:bond:index_futures,1.0,0.004311,0.004311,557.132736,1.676244e-81
commodities_futures,1.0,0.036028,0.036028,4655.658722,4.7559900000000005e-244
forex:commodities_futures,1.0,9.4e-05,9.4e-05,12.166359,0.0005329534
bond:commodities_futures,1.0,0.000829,0.000829,107.129204,9.947440000000001e-23


In [6]:
effects = glm.params.drop(labels='Intercept')
effects *= effects.index.map(lambda str: -2*(-1)**str.count(':'))

print('Factor effects:')
display(effects)

SS = (effects*8)**2/16
percentages = SS/SS.sum()

display(percentages.sort_values())

Factor effects:


forex                                           0.000374
bond                                           -0.001605
forex:bond                                      0.002325
index_futures                                  -0.003568
forex:index_futures                             0.001868
bond:index_futures                              0.000944
forex:bond:index_futures                       -0.004376
commodities_futures                             0.000423
forex:commodities_futures                       0.003471
bond:commodities_futures                        0.004266
forex:bond:commodities_futures                  0.004210
index_futures:commodities_futures              -0.001224
forex:index_futures:commodities_futures         0.000183
bond:index_futures:commodities_futures         -0.000968
forex:bond:index_futures:commodities_futures   -0.001288
dtype: float64

forex:index_futures:commodities_futures         0.000345
forex                                           0.001444
commodities_futures                             0.001850
bond:index_futures                              0.009212
bond:index_futures:commodities_futures          0.009700
index_futures:commodities_futures               0.015502
forex:bond:index_futures:commodities_futures    0.017160
bond                                            0.026647
forex:index_futures                             0.036090
forex:bond                                      0.055915
forex:commodities_futures                       0.124655
index_futures                                   0.131710
forex:bond:commodities_futures                  0.183366
bond:commodities_futures                        0.188273
forex:bond:index_futures                        0.198130
dtype: float64