In [1]:
from itertools import combinations
import json
import os

import matplotlib.pyplot as plt
import numpy as np

from data import COUNTRIES, get_all_data_and_labels
from utils import make_comparable

In [2]:
CORRS_FILE = 'correlations.json'

In [3]:
all_data, all_labels = get_all_data_and_labels()

In [4]:
# For reference if you want to look up anything
print(all_labels)
print(COUNTRIES)

['bond_rets_local_fx', 'equity_prices', 'curr_acct_nom_usd', 'curr_acct_pct_gdp', 'fx_trd_wts_nom', 'fx_trd_wts_real', 'fx_vs_usd', 'gdp_nom', 'gdp_real', 'short_rates', 'long_rates', 'core_cpi_sa', 'm1_usd', 'm2_usd', 'm3_usd']
['USA', 'AUS', 'JPN', 'CAN', 'CHE', 'GBR', 'ESP', 'FRA', 'ITA', 'DEU']


In [5]:
# Load or compute correlations
if os.path.exists(CORRS_FILE):
    corrs = json.load(open(CORRS_FILE, 'r'))
    
else:
    corrs = {}

    for i, j in combinations(range(len(all_data)), r=2):
        print(all_labels[i], all_labels[j])
        if all_labels[i] not in corrs:
            corrs[all_labels[i]] = {}
        if all_labels[j] not in corrs[all_labels[i]]:
            corrs[all_labels[i]][all_labels[j]] = {}
        if all_labels[j] not in corrs:
            corrs[all_labels[j]] = {}
        if all_labels[i] not in corrs[all_labels[j]]:
            corrs[all_labels[j]][all_labels[i]] = {}

        inner_corrs = {}
        for country in COUNTRIES:
            if country not in all_data[i] or country not in all_data[j]:
                continue

            series1, series2 = make_comparable(all_data[i][country], all_data[j][country])
            inner_corrs[country] = np.corrcoef(series1, series2)[0,1]
            print('\t', country, inner_corrs[country])

        country_corrs_arr = np.array(list(inner_corrs.values()))
        inner_corrs['min'] = country_corrs_arr.min()
        inner_corrs['max'] = country_corrs_arr.max()
        inner_corrs['mean'] = country_corrs_arr.mean()
        inner_corrs['median'] = np.median(country_corrs_arr)

        corrs[all_labels[i]][all_labels[j]] = inner_corrs
        corrs[all_labels[j]][all_labels[i]] = inner_corrs

        print(f'min: {country_corrs_arr.min():.5}, max: {country_corrs_arr.max():.5}, mean: {country_corrs_arr.mean():.5}, median: {np.median(country_corrs_arr):.5}\n')
    
    json.dump(corrs, open(CORRS_FILE, 'w'), indent=4)

In [6]:
def print_combos_info(combos):
    for i, j in combos:
        print(all_labels[i], 'and', all_labels[j])
        for k, v in corrs[all_labels[i]][all_labels[j]].items():
            print('\t', k, v)

In [9]:
combos = combinations(range(len(all_data)), r=2)
sorted_combos = sorted(combos, key=lambda x: (
    corrs[all_labels[x[0]]][all_labels[x[1]]]['max'] - corrs[all_labels[x[0]]][all_labels[x[1]]]['min'],
    abs(corrs[all_labels[x[0]]][all_labels[x[1]]]['median']),
    abs(corrs[all_labels[x[0]]][all_labels[x[1]]]['mean'])
))

In [10]:
print_combos_info(reversed(sorted_combos))

curr_acct_nom_usd and m2_usd
	 USA -0.6055232278008814
	 JPN 0.15249886809914273
	 CAN -0.6495946187345321
	 CHE 0.7952866508597232
	 GBR -0.873030957799283
	 ESP 0.08017436993117251
	 FRA -0.7760288342397174
	 ITA 0.03789927742774679
	 DEU 0.9442571822937192
	 min -0.873030957799283
	 max 0.9442571822937192
	 mean -0.09934014332921215
	 median 0.03789927742774679
curr_acct_nom_usd and gdp_nom
	 USA -0.7279296118714181
	 AUS -0.6741948399920237
	 JPN 0.028012738863653068
	 CAN -0.5307531955747884
	 CHE 0.810357045974633
	 GBR -0.8464104009217649
	 ESP -0.20632099309045457
	 FRA -0.8048868582116739
	 ITA -0.3070352801674264
	 DEU 0.9465272853006935
	 min -0.8464104009217649
	 max 0.9465272853006935
	 mean -0.2312634109690571
	 median -0.4188942378711074
curr_acct_nom_usd and m3_usd
	 AUS -0.6105994088296934
	 JPN -0.27520708926404197
	 CAN -0.6202923012996131
	 CHE 0.7898534373248711
	 GBR -0.8266253819015345
	 ESP -0.07293383997180218
	 FRA -0.8093242522265404
	 ITA -0.1304661210138732

	 CHE -0.7855062963093552
	 GBR -0.023436194724282104
	 ESP -0.6884765064387012
	 FRA -0.45569131413611075
	 ITA 0.07428627728762152
	 DEU -0.3932819459838225
	 min -0.8298022090252618
	 max 0.13416733953494284
	 mean -0.3739889207176651
	 median -0.39815943666401643
gdp_nom and m1_usd
	 USA 0.9217899878946862
	 AUS 0.9621698970210477
	 JPN 0.5029836289229308
	 CAN 0.9552189187219321
	 CHE 0.9619243091859125
	 GBR 0.9584463643848686
	 ESP 0.058327836314178864
	 FRA 0.8893859174647937
	 ITA 0.8555324656416353
	 DEU 0.8934315538386949
	 min 0.058327836314178864
	 max 0.9621698970210477
	 mean 0.7959210879390681
	 median 0.9076107708666905
fx_vs_usd and m3_usd
	 AUS -0.23230943095623138
	 JPN -0.7624298081023293
	 CAN -0.38449615728090675
	 CHE -0.8142567417931699
	 GBR -0.19832474878881173
	 ESP -0.7628848963895968
	 FRA -0.5130798812416307
	 ITA 0.07367708930917574
	 DEU -0.5980469251650254
	 min -0.8142567417931699
	 max 0.07367708930917574
	 mean -0.4657946111565029
	 median -0.513079