# Constructing our Minimum-Covariance Portfolios

We will be constructing our portfolios by selecting the assets which have the lowest correlations between each other. We will rebalance semi-annually based on these selections, and at each rebalance, ensure that our portfolio is split among the two geographies 50/50 

In [11]:
import numpy as np
import pandas as pd

In [12]:
# Remove 1st line since they are pct changes, therefore have
# NaNs in the first row
rets = pd.read_csv("returns.csv")[1:]
rets.head()

Unnamed: 0,Date,EWJ,EWT,MCHI,EWY,EWZ,INDA,ACWV,IDV,EWC,...,EFNL,UAE,KWT,CNY=X,BRL=X,ZAR=X,RUB=X,EUR=X,JPY=X,MXN=X
1,2011-01-04,0.000804,-0.007407,,0.002263,0.004396,,,-0.009259,-0.010412,...,,,,-0.00176,0.019189,0.006326,0.0,0.000134,0.006743,-0.000384
2,2011-01-05,-0.007231,-0.016584,,0.000376,-0.011616,,,-0.002596,0.002023,...,,,,0.002584,0.012754,0.003853,0.0,0.002671,0.004506,-0.004249
3,2011-01-06,0.0,-0.002108,,-0.00696,-0.010049,,,-0.009891,-0.004847,...,,,,-0.002956,0.004498,0.00867,0.0,0.012522,0.01403,0.003119
4,2011-01-07,0.003777,-0.015632,,0.005493,-0.009119,,,-0.005258,0.000406,...,,,,0.005929,0.006507,0.012983,0.0,0.013551,0.001334,0.000573
5,2011-01-10,0.0,0.002146,,-0.014883,-0.010071,,,0.000529,-0.003651,...,,,,0.000151,-0.029895,-0.000295,0.0,0.00662,-0.003241,-0.001063


In [13]:
# Reindex to date
rets.index = pd.to_datetime(rets['Date'])
del rets['Date']
rets.head()

Unnamed: 0_level_0,EWJ,EWT,MCHI,EWY,EWZ,INDA,ACWV,IDV,EWC,EWU,...,EFNL,UAE,KWT,CNY=X,BRL=X,ZAR=X,RUB=X,EUR=X,JPY=X,MXN=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-04,0.000804,-0.007407,,0.002263,0.004396,,,-0.009259,-0.010412,0.010462,...,,,,-0.00176,0.019189,0.006326,0.0,0.000134,0.006743,-0.000384
2011-01-05,-0.007231,-0.016584,,0.000376,-0.011616,,,-0.002596,0.002023,-0.002157,...,,,,0.002584,0.012754,0.003853,0.0,0.002671,0.004506,-0.004249
2011-01-06,0.0,-0.002108,,-0.00696,-0.010049,,,-0.009891,-0.004847,-0.008647,...,,,,-0.002956,0.004498,0.00867,0.0,0.012522,0.01403,0.003119
2011-01-07,0.003777,-0.015632,,0.005493,-0.009119,,,-0.005258,0.000406,-0.003925,...,,,,0.005929,0.006507,0.012983,0.0,0.013551,0.001334,0.000573
2011-01-10,0.0,0.002146,,-0.014883,-0.010071,,,0.000529,-0.003651,0.001751,...,,,,0.000151,-0.029895,-0.000295,0.0,0.00662,-0.003241,-0.001063


In [14]:
dates_to_split = pd.date_range(rets.index[0], rets.index[-1], freq='6M')
dates_to_split

DatetimeIndex(['2011-01-31', '2011-07-31', '2012-01-31', '2012-07-31',
               '2013-01-31', '2013-07-31', '2014-01-31', '2014-07-31',
               '2015-01-31', '2015-07-31', '2016-01-31', '2016-07-31',
               '2017-01-31', '2017-07-31', '2018-01-31', '2018-07-31',
               '2019-01-31', '2019-07-31', '2020-01-31', '2020-07-31',
               '2021-01-31'],
              dtype='datetime64[ns]', freq='6M')

In [15]:
# Split on these dates
semiannual = {}

for i in range(len(dates_to_split)-1):
    #print(dates_to_split[i+1])
    semiannual[i] = rets[dates_to_split[i]:dates_to_split[i+1]]

In [16]:
# Sanity check
print(semiannual[0].index[0],semiannual[0].index[-1])

2011-01-31 00:00:00 2011-07-29 00:00:00


In [17]:
# Sanity check
print(semiannual[1].index[0],semiannual[1].index[-1])

2011-08-01 00:00:00 2012-01-31 00:00:00


In [18]:
# Sanity check
print(semiannual[19].index[0],semiannual[19].index[-1])

2020-07-31 00:00:00 2021-01-29 00:00:00


In [48]:
# For each group of 6mo, find the assets with lowest avg correlation
corrs = semiannual[10].corr().abs().mean(axis=1)
corrs

EWJ      0.521344
EWT      0.629399
MCHI     0.635343
EWY      0.675211
EWZ      0.533868
           ...   
ZAR=X    0.132302
RUB=X    0.095289
EUR=X    0.186922
JPY=X    0.081194
MXN=X    0.569602
Length: 63, dtype: float64

In [49]:
np.argsort(corrs)

EWJ      59
EWT      55
MCHI     57
EWY      56
EWZ      54
         ..
ZAR=X    12
RUB=X    27
EUR=X    31
JPY=X    21
MXN=X     7
Length: 63, dtype: int64

In [52]:
corrs.sort_values(axis=0)[:15]

JPY=X    0.081194
BRL=X    0.094671
RUB=X    0.095289
ZAR=X    0.132302
CNY=X    0.134425
EUR=X    0.186922
KSA      0.234277
QAT      0.365469
UAE      0.375328
EPU      0.425429
ICOL     0.468269
TUR      0.507790
FM       0.510881
JXI      0.517473
EWJ      0.521344
dtype: float64

In [54]:
# Loop through each semiannual period

top15 = {}

for i in range(1,len(semiannual)):
    corrs = semiannual[i].corr().abs().mean(axis=1)
    top15[i] = corrs.sort_values(axis=0)[:15]
    print(i)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [55]:
top15

{1: CNY=X    0.078865
 JPY=X    0.116321
 RUB=X    0.116667
 ZAR=X    0.135677
 BRL=X    0.136997
 EUR=X    0.210791
 KXI      0.529112
 ACWV     0.567478
 MCHI     0.624926
 MXN=X    0.634076
 EIRL     0.655932
 EPHE     0.658137
 EIDO     0.664700
 TUR      0.692404
 EIS      0.693257
 dtype: float64, 2: BRL=X    0.084623
 EUR=X    0.112626
 JPY=X    0.117188
 CNY=X    0.117628
 RUB=X    0.145799
 ZAR=X    0.147198
 EPHE     0.518746
 EIRL     0.528939
 INDA     0.537874
 ENOR     0.562681
 EIS      0.571743
 EIDO     0.583107
 EWP      0.585431
 MXN=X    0.592983
 THD      0.594155
 dtype: float64, 3: JPY=X    0.070302
 CNY=X    0.075771
 BRL=X    0.075915
 ZAR=X    0.088212
 EUR=X    0.101070
 RUB=X    0.106086
 FM       0.293089
 INDA     0.333545
 TUR      0.375286
 EDEN     0.399229
 EIRL     0.421758
 EFNL     0.432039
 EPHE     0.433832
 EIS      0.436821
 EZA      0.446063
 dtype: float64, 4: BRL=X    0.067611
 CNY=X    0.076011
 EUR=X    0.123868
 ZAR=X    0.139976
 ICOL    