In [None]:
"""
Description: Script to optimize Duca's currency mix.
Author: Jeroen van Dijk & Victor de Graaff
Date: 04-11-2020
Maintainer: Jeroen van Dijk & Victor de Graaff
Email: jeroen.vandijk@d-data.nl & victor.degraaff@d-data.nl
Status: Dev
"""

In [2]:
# Load common imports
%run ./CommonImports.ipynb

In [3]:
# Load common functions and currencies lists:
# - all_currencies
# - obsolete_currencies
# - p13_currencies
# - f_currencies
# - ff_currencies
# - currencies_per_continent
%run ./Utilities.ipynb

In [4]:
# Load loss functions
# - calculate_loss_function_around_one(weights)
# - calculate_loss_function_vs_t_minus_one(weights)
# - calculate_loss_function_vs_t_minus_one_with_balancing(weights)
# - calculate_loss_function_vs_t_minus_one_for_period(weights, max_date)
%run ./LossFunctions.ipynb

In [13]:
# Create dataframe with pre-defined base currency
df = create_original_df("USD")

In [16]:
trading_volumes

{'USD': 0.4415,
 'EUR': 0.16149999999999998,
 'JPY': 0.084,
 'GBP': 0.064,
 'AUD': 0.034,
 'CAD': 0.025,
 'CHF': 0.025,
 'CNY': 0.0215,
 'HKD': 0.0175,
 'NZD': 0.0105,
 'SEK': 0.01,
 'KRW': 0.01,
 'SGD': 0.009000000000000001,
 'NOK': 0.009000000000000001,
 'MXN': 0.0085,
 'INR': 0.0085,
 'RUB': 0.0055000000000000005,
 'ZAR': 0.0055000000000000005,
 'TRY': 0.0055000000000000005,
 'BRL': 0.0055000000000000005,
 'TWD': 0.0045000000000000005,
 'DKK': 0.003,
 'PLN': 0.003,
 'THB': 0.0025,
 'IDR': 0.002,
 'HUF': 0.002,
 'CZK': 0.002,
 'ILS': 0.0015,
 'CLP': 0.0015,
 'PHP': 0.0015,
 'AED': 0.001,
 'COP': 0.001,
 'SAR': 0.001,
 'MYR': 0.0005,
 'RON': 0.0005,
 'HRK': 0.0005,
 'other': 0.0105}

In [28]:
df = create_original_df("KRW")

pearsonr(df["CHF"], df["CNY"])[0]


0.9658745992067209

In [31]:
from scipy.stats import pearsonr
currency_pairs = [(c1, c2) for c1 in all_currencies for c2 in all_currencies if c1 > c2]
highly_correlated_currencies = set()
highly_unstable_currencies = set()

result = []

for base_currency in all_currencies:
    print(f"Evaluating correlations using base currency {base_currency}")
    df = create_original_df(base_currency)
    
    if pearsonr(df["EUR"], df["USD"])[0] > .8:
        highly_unstable_currencies.add(base_currency)
        continue
    
    for c1, c2 in currency_pairs:
        if c1 in highly_correlated_currencies or c2 in highly_correlated_currencies:
            continue
            
        corr = pearsonr(df[c1], df[c2])[0]

        if abs(corr) > .98:
            if trading_volumes[c1] > trading_volumes[c2] and \
               trading_volumes[base_currency] > trading_volumes[c2]:
                print(f"According to {base_currency} {c1} and {c2} are highly corrolated:", corr)
                highly_correlated_currencies.add(c2)
            elif trading_volumes[c2] > trading_volumes[c1] and \
                 trading_volumes[base_currency] > trading_volumes[c1]:
                print(f"According to {base_currency} {c2} and {c1} are highly corrolated:", corr)
                highly_correlated_currencies.add(c1)

    print(highly_unstable_currencies, highly_correlated_currencies)
    
highly_unstable_currencies, highly_correlated_currencies

# Highly correlated currencies:
# EUR/DKK: 0.9999406158934495 (seen from USD)
# EUR/HRK: 0.9958057992714155 (seen from USD)
# CHF/THB: 0.9523914232152878 (seen from EUR)
# CNY/PHP: 0.9656167273433154 (seen from EUR)
# HUF/RON: 0.9676987470452582 (seen from EUR)
# USD/HKD: 0.9996748314568983 (seen from EUR)

Evaluating correlations using base currency MYR
set() set()
Evaluating correlations using base currency ZAR
Evaluating correlations using base currency KRW
According to KRW EUR and DKK are highly corrolated: 0.9999187094019177
According to KRW EUR and HRK are highly corrolated: 0.9935931119800973
{'ZAR'} {'DKK', 'HRK'}
Evaluating correlations using base currency CNY
According to CNY HUF and RON are highly corrolated: 0.9849968523226149
According to CNY USD and HKD are highly corrolated: 0.9998049067795064
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations using base currency PLN
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations using base currency DKK
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations using base currency AUD
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations using base currency JPY
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations using base currency HUF
{'ZAR'} {'HKD', 'RON', 'DKK', 'HRK'}
Evaluating correlations usi

({'CZK', 'RUB', 'TRY', 'ZAR'}, {'DKK', 'HKD', 'HRK', 'PHP', 'RON', 'RUB'})

In [36]:
# Determine splits for dev- (train and test) and validation set
train_start = date(2005, 4, 1)
test_start = date(2012, 1, 1)
validation_start = date(2015, 1, 1)
validation_end = date(2020, 10, 1)

# Create dev (train and test) and validation set
train, test, dev, val = split_data(df, train_start, test_start, validation_start, validation_end)

In [37]:
# Create exchange table for each currency
exchange_table = dict()
for base_currency in all_currencies:
    exchange_table[base_currency] = 1/train[all_currencies].divide(train[base_currency], axis=0)

# Preview USD exchange table
exchange_table["USD"]

Unnamed: 0_level_0,MYR,ZAR,KRW,CNY,PLN,DKK,AUD,JPY,HUF,EUR,THB,RUB,NOK,NZD,IDR,HKD,PHP,GBP,CAD,SEK,RON,TRY,USD,CZK,CHF,SGD,HRK
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2005-04-01,0.26316,0.16175,0.00099,0.12082,0.31703,0.17394,0.77123,0.00932,0.00524,1.29590,0.02554,0.03588,0.15813,0.71051,0.00011,0.12823,0.01827,1.88728,0.82353,0.14134,0.35967,0.73963,1.00000,0.04315,0.83461,0.60418,0.17460
2005-04-02,0.26316,0.16153,0.00099,0.12082,0.31580,0.17360,0.77012,0.00930,0.00523,1.29337,0.02546,0.03587,0.15780,0.70945,0.00011,0.12822,0.01826,1.88327,0.82270,0.14100,0.35897,0.73758,1.00000,0.04307,0.83284,0.60293,0.17424
2005-04-03,0.26317,0.16132,0.00099,0.12082,0.31458,0.17326,0.76901,0.00928,0.00522,1.29083,0.02537,0.03586,0.15748,0.70838,0.00011,0.12822,0.01825,1.87926,0.82187,0.14065,0.35827,0.73553,1.00000,0.04299,0.83106,0.60168,0.17387
2005-04-04,0.26317,0.16110,0.00099,0.12082,0.31336,0.17292,0.76790,0.00926,0.00521,1.28830,0.02529,0.03585,0.15715,0.70731,0.00011,0.12821,0.01824,1.87525,0.82104,0.14031,0.35756,0.73349,1.00000,0.04291,0.82929,0.60044,0.17351
2005-04-05,0.26316,0.16014,0.00098,0.12082,0.31019,0.17195,0.76441,0.00921,0.00518,1.28100,0.02522,0.03579,0.15691,0.70431,0.00011,0.12821,0.01824,1.87253,0.81681,0.13960,0.35554,0.73137,1.00000,0.04271,0.82427,0.60059,0.17266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-12-27,0.31511,0.12258,0.00087,0.15816,0.29675,0.17582,1.01609,0.01284,0.00428,1.30690,0.03181,0.03197,0.16767,0.77395,0.00011,0.12859,0.02285,1.56797,0.98101,0.14564,0.30464,0.52632,1.00000,0.05068,1.07035,0.77144,0.17385
2011-12-28,0.31596,0.12334,0.00087,0.15827,0.29749,0.17583,1.01989,0.01288,0.00426,1.30740,0.03171,0.03159,0.16764,0.77766,0.00011,0.12858,0.02274,1.56725,0.98530,0.14578,0.30502,0.52633,1.00000,0.05064,1.07199,0.77197,0.17367
2011-12-29,0.31451,0.12265,0.00087,0.15822,0.29022,0.17337,1.00703,0.01286,0.00415,1.28890,0.03147,0.03103,0.16580,0.76652,0.00011,0.12866,0.02281,1.54175,0.97725,0.14415,0.29957,0.52288,1.00000,0.04975,1.05769,0.76697,0.17100
2011-12-30,0.31516,0.12343,0.00086,0.15859,0.29024,0.17405,1.01698,0.01291,0.00411,1.29390,0.03157,0.03098,0.16687,0.77308,0.00011,0.12873,0.02280,1.54902,0.97911,0.14519,0.29929,0.52959,1.00000,0.05018,1.06441,0.76931,0.17167


In [61]:
# Loss function 1
def calculate_loss_function_around_one(weights, selected_currencies, reference_currencies):
    weights = weights / weights.sum()
    amounts = pd.Series(weights, index=selected_currencies) * reference_date_exhange_rate

    loss_function_score = 0

    for base_currency in reference_currencies:
        amounts_through_time = amounts * exchange_table[base_currency]
        normalized_amounts_through_time = amounts_through_time.sum(axis=1) / reference_date_exhange_rate[base_currency]
        
        # TODO: other possibilities for loss functions:
        # - Mean Absolute Error MAE
        # - Root Mean Squared Error RMSE
        # - Time-series related loss functions (research)
        
        deviation = np.log(normalized_amounts_through_time)
        loss_function_score += trading_volumes[base_currency] * (deviation**2).mean()
    
    return loss_function_score

In [62]:
def run_optimization(selected_currencies, 
                     max_iter=100, 
                     loss_function=calculate_loss_function_around_one,
                     reference_currencies=all_currencies):
    starting_point = np.ones(len(selected_currencies))/len(selected_currencies)
    bounds = [(0, 1) for _ in range(len(starting_point))]
    
    result = minimize(fun=loss_function, 
                      x0=starting_point, 
                      args=(selected_currencies, reference_currencies),
                      bounds=bounds,
                      options={"disp": True, 
                               "maxiter": max_iter})
    
    output = pd.Series(result.x/result.x.sum(), index=reference_date_exhange_rate.loc[selected_currencies].index)
    output = output.sort_values(ascending=False)
        
    return output

In [76]:
def try_currencies(currencies_to_try, currencies, reference_currencies):
    for currency in currencies_to_try:
        if currency in currencies:
            return None, 1

#     print(f"Evaluating {currencies} + {currencies_to_try}")
    new_mix = run_optimization(selected_currencies=currencies + currencies_to_try, 
                               max_iter=100, 
                               loss_function=calculate_loss_function_around_one,
                               reference_currencies=reference_currencies)

    new_score = calculate_loss_function_around_one(new_mix, currencies + currencies_to_try, reference_currencies)

    return new_mix, new_score

In [67]:
candidate_currencies

['AUD',
 'CAD',
 'CHF',
 'CNY',
 'CZK',
 'EUR',
 'GBP',
 'HUF',
 'IDR',
 'JPY',
 'KRW',
 'MYR',
 'NOK',
 'NZD',
 'PLN',
 'SEK',
 'SGD',
 'THB',
 'TRY',
 'USD',
 'ZAR']

In [72]:
best_score = 1
last_score = 1
selected_currencies_so_far = []

candidate_currencies = [c for c in all_currencies if c not in highly_correlated_currencies and
                                                     c not in highly_unstable_currencies]
candidate_currencies.sort()

reference_date_exhange_rate = train[candidate_currencies].iloc[0]

while best_score == 1 or best_candidate is not None:
    print(f"Attempting to improve set, starting from: {selected_currencies_so_far}")
    best_candidate = None
    
    for candidate_currency in candidate_currencies:
        new_mix, new_score = try_currencies([candidate_currency], 
                                            selected_currencies_so_far, 
                                            candidate_currencies)

        if new_score < .995 * last_score:
            if new_score < best_score:
                best_score = new_score
                best_candidate = candidate_currency

                print(f"New best score found: {new_score}")
                print(new_mix)
            elif last_score < 1:
                print(f"Better than last score, but not better than best: {new_score}")
                print(new_mix)
    
    last_score = best_score
    
    if best_candidate is not None:
        selected_currencies_so_far += [best_candidate]
        
print(f"Done. Best set: {selected_currencies_so_far}")

Attempting to improve set, starting from: []
Evaluating [] + ['AUD']
New best score found: 0.020513913774726975
AUD   1.00000
dtype: float64
Evaluating [] + ['CAD']
New best score found: 0.016787617385285697
CAD   1.00000
dtype: float64
Evaluating [] + ['CHF']
Evaluating [] + ['CNY']
Evaluating [] + ['EUR']
New best score found: 0.008772374917859902
EUR   1.00000
dtype: float64
Evaluating [] + ['GBP']
Evaluating [] + ['HUF']
Evaluating [] + ['IDR']
Evaluating [] + ['JPY']
Evaluating [] + ['KRW']
Evaluating [] + ['MYR']
Evaluating [] + ['NOK']
Evaluating [] + ['NZD']
Evaluating [] + ['PLN']
Evaluating [] + ['SEK']
Evaluating [] + ['SGD']
Evaluating [] + ['THB']
Evaluating [] + ['USD']
New best score found: 0.007873209372139964
USD   1.00000
dtype: float64
Attempting to improve set, starting from: ['USD']
Evaluating ['USD'] + ['AUD']
New best score found: 0.006604604818370605
USD   0.78715
AUD   0.21285
dtype: float64
Evaluating ['USD'] + ['CAD']
Better than last score, but not better th

In [None]:
# Run 27-11-2020: .995: ['USD', 'AUD', 'EUR']
# Run 27-11-2020: .999: ['USD', 'AUD', 'EUR', 'CHF', 'SEK']
# Run 27-11-2020: .9995: ['USD', 'AUD', 'EUR', 'CHF', 'SEK', 'JPY']

# TODO: use every currency as a starting point, and see where you end up

In [None]:
candidate_currencies = [c for c in all_currencies if c not in highly_correlated_currencies and
                                                     c not in highly_unstable_currencies]
candidate_currencies.sort()

reference_date_exhange_rate = train[candidate_currencies].iloc[0]

for currency in candidate_currencies:
    if currency in ['AUD', 'USD', 'EUR', 'CHF', 'SEK']:
        # We know the result for these already
        continue
        
    selected_currencies_so_far = [currency]
    best_score = 1
    last_score = 1

    while best_score == 1 or best_candidate is not None:
        print(f"Attempting to improve set, starting from: {selected_currencies_so_far}")
        best_candidate = None

        for candidate_currency in candidate_currencies:
            new_mix, new_score = try_currencies([candidate_currency], 
                                                selected_currencies_so_far, 
                                                candidate_currencies)

            if new_score < .999 * last_score:
                if new_score < best_score:
                    best_score = new_score
                    best_candidate = candidate_currency

#                     print(f"New best score found: {new_score}")
#                     print(new_mix)
                elif last_score < 1:
#                     print(f"Better than last score, but not better than best: {new_score}")
#                     print(new_mix)
                    pass

        last_score = best_score

        if best_candidate is not None:
            selected_currencies_so_far += [best_candidate]

    print(f"Best set: {selected_currencies_so_far} ({best_score})")

Attempting to improve set, starting from: ['CAD']
Attempting to improve set, starting from: ['CAD', 'USD']
Attempting to improve set, starting from: ['CAD', 'USD', 'EUR']
Attempting to improve set, starting from: ['CAD', 'USD', 'EUR', 'AUD']
