In [None]:
"""
Description: Perform (backward) selection in currency selection for final Duca basket.
Author: Jeroen van Dijk
Date: 11-11-2020
Maintainer: Jeroen van Dijk
Email: jeroen.vandijk@d-data.nl
Status: Dev
"""

In [2]:
# Load common imports
%run ./CommonImports.ipynb

In [3]:
# Load common functions and currencies lists:
# - all_currencies
# - obsolete_currencies
# - p13_currencies
# - f_currencies
# - ff_currencies
# - currencies_per_continent
%run ./Utilities.ipynb

In [4]:
# Create dataframe with pre-defined base currency
df = create_original_df("EUR")

In [5]:
# Determine splits for dev- (train and test) and validation set
train_start = date(2005, 4, 1)
test_start = date(2012, 1, 1)
validation_start = date(2015, 1, 1)
validation_end = date(2020, 10, 1)

# Create dev (train and test) and validation set
train, test, dev, val = split_data(df, train_start, test_start, validation_start, validation_end)

In [6]:
results = pd.DataFrame()

# Calculate loss function score per currency
for base_currency in all_currencies:
    for currency in all_currencies:
        if currency == base_currency:
            continue
            
        rates = dev[base_currency] / dev[currency]
        normalized_rates = rates / rates.iloc[0]
        
        deviation = np.log(normalized_rates)
        loss_function_score = (deviation**2).mean()

        results = results.append({"currency": currency,
                                  "base_currency": base_currency,
                                  "loss_function_score": loss_function_score},
                                 ignore_index=True)
    
results = results.groupby("currency").median()
results.sort_values("loss_function_score", ascending=True)

Unnamed: 0_level_0,loss_function_score
currency,Unnamed: 1_level_1
CAD,0.015
MYR,0.016
NOK,0.017
HRK,0.02
SEK,0.02
CZK,0.021
EUR,0.021
DKK,0.021
USD,0.021
HKD,0.022


In [7]:
exchange_table = dict()

for base_currency in all_currencies:
    exchange_table[base_currency] = 1/train[all_currencies].divide(train[base_currency], axis=0)

exchange_table["USD"]

Unnamed: 0_level_0,KRW,THB,EUR,HRK,NZD,PHP,JPY,USD,HKD,AUD,RON,SEK,TRY,CNY,MYR,CHF,ZAR,CZK,SGD,NOK,GBP,PLN,DKK,RUB,HUF,IDR,CAD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2005-04-01,9.923e-04,0.026,1.296,0.175,0.711,0.018,0.009,1.0,0.128,0.771,0.360,0.141,0.740,0.121,0.263,0.835,0.162,0.043,0.604,0.158,1.887,0.317,0.174,0.036,0.005,1.056e-04,0.824
2005-04-02,9.904e-04,0.025,1.293,0.174,0.709,0.018,0.009,1.0,0.128,0.770,0.359,0.141,0.738,0.121,0.263,0.833,0.162,0.043,0.603,0.158,1.883,0.316,0.174,0.036,0.005,1.055e-04,0.823
2005-04-03,9.886e-04,0.025,1.291,0.174,0.708,0.018,0.009,1.0,0.128,0.769,0.358,0.141,0.736,0.121,0.263,0.831,0.161,0.043,0.602,0.157,1.879,0.315,0.173,0.036,0.005,1.055e-04,0.822
2005-04-04,9.867e-04,0.025,1.288,0.174,0.707,0.018,0.009,1.0,0.128,0.768,0.358,0.140,0.733,0.121,0.263,0.829,0.161,0.043,0.600,0.157,1.875,0.313,0.173,0.036,0.005,1.054e-04,0.821
2005-04-05,9.828e-04,0.025,1.281,0.173,0.704,0.018,0.009,1.0,0.128,0.764,0.356,0.140,0.731,0.121,0.263,0.824,0.160,0.043,0.601,0.157,1.873,0.310,0.172,0.036,0.005,1.053e-04,0.817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-12-27,8.652e-04,0.032,1.307,0.174,0.774,0.023,0.013,1.0,0.129,1.016,0.305,0.146,0.526,0.158,0.315,1.070,0.123,0.051,0.771,0.168,1.568,0.297,0.176,0.032,0.004,1.086e-04,0.981
2011-12-28,8.651e-04,0.032,1.307,0.174,0.778,0.023,0.013,1.0,0.129,1.020,0.305,0.146,0.526,0.158,0.316,1.072,0.123,0.051,0.772,0.168,1.567,0.297,0.176,0.032,0.004,1.087e-04,0.985
2011-12-29,8.658e-04,0.031,1.289,0.171,0.767,0.023,0.013,1.0,0.129,1.007,0.300,0.144,0.523,0.158,0.315,1.058,0.123,0.050,0.767,0.166,1.542,0.290,0.173,0.031,0.004,1.087e-04,0.977
2011-12-30,8.634e-04,0.032,1.294,0.172,0.773,0.023,0.013,1.0,0.129,1.017,0.299,0.145,0.530,0.159,0.315,1.064,0.123,0.050,0.769,0.167,1.549,0.290,0.174,0.031,0.004,1.103e-04,0.979


In [8]:
reference_date_exhange_rate = train[all_currencies].iloc[0]

def calculate_loss_function_around_one(weights):
    weights = weights / weights.sum()
    amounts = weights * reference_date_exhange_rate

    # TODO: penalize large coefficients?
    # JD: we do not face the problem of large coefficients (yet)
    loss_function_score = 0

    for base_currency in currencies:
        amounts_through_time = amounts * exchange_table[base_currency]
        normalized_amounts_through_time = amounts_through_time.sum(axis=1) / reference_date_exhange_rate[base_currency]
        
        # TODO: other possibilities for loss functions:
        # - Mean Absolute Error MAE
        # - Root Mean Squared Error RMSE
        # - Time-series related loss functions (research)
        
        deviation = np.log(normalized_amounts_through_time)
        loss_function_score += (deviation**2).mean()
    
    return loss_function_score

In [9]:
def calculate_loss_function_vs_t_minus_one(weights):
    weights = weights / weights.sum()
    amounts = weights * reference_date_exhange_rate.loc[p13_currencies]

    loss_function_score = 0

    for base_currency in p13_currencies:
        amounts_through_time = (amounts * exchange_table[base_currency]).sum(axis=1)
        loss_function_score += (np.abs(amounts_through_time - amounts_through_time.shift(1))/amounts_through_time).mean()

    return loss_function_score

starting_point = np.ones(13)/13

result = minimize(calculate_loss_function_vs_t_minus_one, starting_point, options={"disp": True, "maxiter": 100})
duca_mix = pd.Series(result.x/result.x.sum(), index=reference_date_exhange_rate.loc[p13_currencies].index)
print(calculate_loss_function_vs_t_minus_one(duca_mix))
duca_mix.sort_values(ascending=False)

Optimization terminated successfully.
         Current function value: 0.035071
         Iterations: 68
         Function evaluations: 1050
         Gradient evaluations: 75
0.03507095558894311


EUR    0.164
SGD    0.131
GBP    0.102
CNY    0.084
NOK    0.082
SEK    0.075
AUD    0.069
JPY    0.055
CAD    0.053
NZD    0.052
CHF    0.046
USD    0.045
ZAR    0.043
dtype: float64

In [14]:
# Everything contains all currencies now
this_day_by_month = {}
previous_day_by_month = {}
diff_by_month = defaultdict(dict)

for base_currency, table in exchange_table.items():
    if base_currency not in all_currencies:
        continue
        
    exchange_table[base_currency]["month"] = exchange_table[base_currency].index.map(lambda x: int(x.strftime("%Y%m")))

    this_day = exchange_table[base_currency]
    previous_day = pd.DataFrame(exchange_table[base_currency].shift(1), 
                                index=exchange_table[base_currency].index)
    previous_day["month"] = previous_day.index.map(lambda x: int(x.strftime("%Y%m")))
    
    this_day_by_month[base_currency] = {month: values[all_currencies] for month, values in this_day.groupby("month")}
    previous_day_by_month[base_currency] = {month: values[all_currencies] for month, values in previous_day.groupby("month")}
    
    for month in this_day_by_month[base_currency].keys():
        diff_by_month[base_currency][month] = np.abs((this_day_by_month[base_currency][month] - previous_day_by_month[base_currency][month]) / 
                                                      previous_day_by_month[base_currency][month])
    
months = list(exchange_table["EUR"]["month"].unique())

In [15]:
bounds = [(0,1) for _ in range(len(p13_currencies))]

In [16]:
def calculate_loss_function_vs_t_minus_one_with_balancing(weights):
    print("evaluate")
    
    # Ensure we never deviate from 100% total, and only use positive numbers
    weights = np.abs(weights) / np.abs(weights).sum()  
    
    # Calculate the weights after rebalancing for each month
    amounts_by_month = {month: weights / this_day_by_month["EUR"][month].iloc[0][p13_currencies] for month in months}
    
    # Calculate the loss function per base currency, per month
    return sum([sum([(amounts_by_month[month] * diff_by_month[base_currency][month]).sum(axis=1).sum() for month in months]) 
                for base_currency in p13_currencies])

starting_point = np.ones(13)/13
result = minimize(calculate_loss_function_vs_t_minus_one_with_balancing, starting_point, bounds=bounds, options={"disp": True, 
                                                                                                                 "maxiter": 2,
                                                                                                                 })
duca_mix = pd.Series(result.x/result.x.sum(), index=reference_date_exhange_rate.loc[p13_currencies].index)
# print(calculate_loss_function_vs_t_minus_one_with_balancing(duca_mix))
duca_mix.sort_values(ascending=False)

evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate
evaluate


NZD    1.080e-01
AUD    1.080e-01
SGD    1.080e-01
CAD    1.080e-01
CHF    1.080e-01
GBP    1.080e-01
EUR    1.080e-01
USD    1.080e-01
NOK    5.248e-02
CNY    4.302e-02
SEK    4.068e-02
ZAR    1.199e-17
JPY    0.000e+00
dtype: float64

In [17]:
def calculate_loss_function_vs_t_minus_one_for_period(weights, max_date):
    weights = weights / weights.sum()
    amounts = weights * reference_date_exhange_rate.loc[p13_currencies]

    loss_function_score= 0

    for base_currency in p13_currencies:
        amounts_through_time = (amounts * exchange_table[base_currency].loc[:max_date]).sum(axis=1)
        loss_function_score += (np.abs(amounts_through_time - amounts_through_time.shift(1))/amounts_through_time).mean()

    return loss_function_score

In [18]:
def calculate_loss_function_vs_t_minus_one_with_balancing(weights):
    
    # Ensure we never deviate from 100% total, and only use positive numbers
    weights = np.abs(weights) / np.abs(weights).sum()  
    
    # Calculate the weights after rebalancing for each month
    amounts_by_month = {month: weights / this_day_by_month["EUR"][month].iloc[0][all_currencies] for month in months}
    
    # Calculate the loss function per base currency, per month
    return sum([sum([(amounts_by_month[month] * diff_by_month[base_currency][month]).sum(axis=1).sum() for month in months]) 
                for base_currency in all_currencies])

In [19]:
def recalculate_weights(weights):
    """Helper function to recalculate weigths (baseline principle).
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies."""
    weights[weights>0] = 1
    weights = weights/weights.sum()
    return weights

In [20]:
def evaluate_exclusion_of_currency(weights, 
                                   currency_set):
    """Perform backward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""
    # Initialize string giving best currency to exclude
    best_to_exclude = "None"
    # Calculate initial loss function value
    starting_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(weights)
    
    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")
    print("")

    # Each time we eliminate one currency
    for excluded_currency in currency_set:
        print(f"evaluating {excluded_currency}")

        # We create the new weights,
        new_weights = weights.copy()
        new_weights[new_weights.index==excluded_currency] = 0
        new_weights = recalculate_weights(new_weights)
        
        # and calculate the new loss function value
        new_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(new_weights)
        print(f"new loss function value: {new_loss_function_value}")

        # Check whether the new loss function value is lower than the (so far) best one, and
        # the improvement needs to be > .5%
        if new_loss_function_value <= best_loss_function_value and new_loss_function_value < .995*starting_loss_function_value:
            print(f"better solution found by excluding {excluded_currency}")
            print(f"loss function decreased by {(best_loss_function_value-new_loss_function_value)/best_loss_function_value*100:.2f}% compared to current best solution")
            # If so, save these results
            best_loss_function_value = new_loss_function_value
            best_to_exclude = excluded_currency
        print("")
        
    if best_to_exclude == "None":
        print("No currency needed to be removed.")
    else:
        print(f"{best_to_exclude} is removed from currency set")
    print("")
    print(f"--------- new iteration ---------")
    print("")
    
    # Exclude the currency
    currency_set.remove(best_to_exclude)
    return starting_loss_function_value, best_loss_function_value, best_to_exclude, currency_set

In [None]:
weights = pd.Series(np.ones(len(all_currencies))/len(all_currencies), index=reference_date_exhange_rate.loc[all_currencies].index)
currency_set = set(all_currencies)
max_excluded_currencies = 27

for i in range(0, max_excluded_currencies):
    _, _, best_to_exclude, currencies = evaluate_exclusion_of_currency(weights, currency_set)
    if best_to_exclude == "None":
        continue
    weights[weights.index==best_to_exclude] = 0
    weights = recalculate_weights(weights)

In [22]:
def evaluate_inclusion_of_currency(weights, 
                                   currency_set):
    """Perform forward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""

    # Initialize string giving best currency to exclude
    best_to_include = "None"
    # Calculate initial loss function value
    starting_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(weights)

    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")

    # Each time we include one currency
    for included_currency in currency_set:
        print(f"\nincluding {included_currency}")

        # We create the new weights,
        new_weights = weights.copy()
        new_weights[new_weights.index==included_currency] = 1
        new_weights = recalculate_weights(new_weights)

        # and calculate the new loss function value
        new_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(new_weights)
        print(f"new loss function value: {new_loss_function_value}")
        
        # TODO: Check boundaries
        # Check whether the new loss function value is lower than the (so far) best one, and
        # the improvement needs to be > .5%
        if new_loss_function_value <= 1.05*best_loss_function_value and new_loss_function_value < 1.05*starting_loss_function_value:
            print(f"\nbetter solution found by including {included_currency}")
            print(f"loss function decreased by {(best_loss_function_value-new_loss_function_value)/best_loss_function_value*100:.2f}% compared to current best solution\n")
            # If so, save these results
            best_loss_function_value = new_loss_function_value
            best_to_include = included_currency
        print("")

    if best_to_include == "None":
        print("No currency needed to be included.")
    else:
        print(f"{best_to_include} is added to currency set\n")
        # Exclude the currency
        currency_set.remove(best_to_include)
    return starting_loss_function_value, best_loss_function_value, best_to_include, currency_set

In [None]:
# Create weights series and specificy first currency to include
weights = pd.Series(0, index=reference_date_exhange_rate.loc[all_currencies].index)
first_currency = "GBP"
weights[first_currency] = 1
currency_set = set(all_currencies)
currency_set.remove(first_currency)

# Set maximum number of currencies to include
max_currencies_to_include = 14

for i in range(0, max_currencies_to_include):
    _, _, best_to_include, currencies = evaluate_inclusion_of_currency(weights, currency_set)
    if best_to_include == "None":
        break
    weights[weights.index==best_to_include] = 0
    weights = recalculate_weights(weights)

In [26]:
def evaluate_inclusion_of_currency_difference_weights(weights, 
                                                      currency_set):
    """Perform forward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""

    # Initialize string giving best currency to exclude
    best_to_include = "None"
    # Initialize string giving best currency weights
    best_weights = weights.copy()
    # Calculate initial loss function value
    starting_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(weights)

    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")

    # Each time we include one currency
    for included_currency in currency_set:
        print(f"\nincluding {included_currency}\n")
        
        new_weights = weights.copy()
        no_currencies = (new_weights>0).sum()+1
        possible_weights_included_currency = np.arange(0, 1/no_currencies, 1/no_currencies/10) + 1/no_currencies/10
        
        for possible_weight in possible_weights_included_currency:
            new_weights[new_weights>0] = (1-possible_weight)/(no_currencies-1)
            new_weights[included_currency] = possible_weight
        
            # We create the new weights,
#             new_weights = weights.copy()
#             new_weights[new_weights.index==included_currency] = 1
#             new_weights = recalculate_weights(new_weights)

            # and calculate the new loss function value
            new_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(new_weights)
            print(f"weight {included_currency}: {possible_weight}")
            print(f"other weight(s): {(1-possible_weight)/(no_currencies-1)}")
            print(f"new loss function value: {new_loss_function_value}\n")

            # TODO: Check boundaries
            # Check whether the new loss function value is lower than the (so far) best one, and
            # the improvement needs to be > .5%
            if new_loss_function_value <= 1.05*best_loss_function_value and new_loss_function_value < 1.05*starting_loss_function_value:
                print(f"\nbetter solution found by including {included_currency}")
                print(f"loss function decreased by {(best_loss_function_value-new_loss_function_value)/best_loss_function_value*100:.2f}% compared to current best solution\n")
                # If so, save these results
                best_weights = new_weights
                print(best_weights)
                best_loss_function_value = new_loss_function_value
                best_to_include = included_currency

    if best_to_include == "None":
        print("No currency needed to be included.")
    else:
        print(f"{best_to_include} is added to currency set\n")
        # Exclude the currency
        currency_set.remove(best_to_include)
    return starting_loss_function_value, best_loss_function_value, best_to_include, best_weights, currency_set

In [28]:
# Create weights series and specificy first currency to include
weights = pd.Series(0, index=reference_date_exhange_rate.loc[all_currencies].index)
first_currency = "GBP"
weights[first_currency] = 1
currency_set = set(["EUR", "CAD", "AUD", "GBP"])
currency_set.remove(first_currency)

# Set maximum number of currencies to include
max_currencies_to_include = 14

for i in range(0, max_currencies_to_include):
    _, _, best_to_include, best_weights, currencies = evaluate_inclusion_of_currency_difference_weights(weights, currency_set)
    if best_to_include == "None":
        break
#     weights[weights.index==best_to_include] = 0
#     weights = recalculate_weights(weights)
    weights = best_weights

starting loss function value: 187.9976428714264

including AUD

weight AUD: 0.05
other weight(s): 0.95
new loss function value: 199.9617248744148

weight AUD: 0.1
other weight(s): 0.9
new loss function value: 211.92580687740318

weight AUD: 0.15000000000000002
other weight(s): 0.85
new loss function value: 223.8898888803915

weight AUD: 0.2
other weight(s): 0.8
new loss function value: 235.85397088337987

weight AUD: 0.25
other weight(s): 0.75
new loss function value: 247.81805288636824

weight AUD: 0.3
other weight(s): 0.7
new loss function value: 259.78213488935666

weight AUD: 0.35000000000000003
other weight(s): 0.6499999999999999
new loss function value: 271.74621689234505

weight AUD: 0.4
other weight(s): 0.6
new loss function value: 283.7102988953334

weight AUD: 0.45
other weight(s): 0.55
new loss function value: 295.67438089832183

weight AUD: 0.5
other weight(s): 0.5
new loss function value: 307.6384629013101


including CAD

weight CAD: 0.05
other weight(s): 0.95
new loss fu

In [None]:
assert get_libra_mix(reference_date_exhange_rate).sum() == 1
assert get_sdr_mix(reference_date_exhange_rate).sum() == 1