In [1]:
"""
Description: Perform (backward and forward) selection in currency selection for final Duca basket.
Author: Jeroen van Dijk
Date: 11-11-2020
Maintainer: Jeroen van Dijk
Email: jeroen.vandijk@d-data.nl
Status: Dev
"""

'\nDescription: Perform (backward and forward) selection in currency selection for final Duca basket.\nAuthor: Jeroen van Dijk\nDate: 11-11-2020\nMaintainer: Jeroen van Dijk\nEmail: jeroen.vandijk@d-data.nl\nStatus: Dev\n'

In [2]:
# Load common imports
%run ./CommonImports.ipynb

In [4]:
# Load common functions and currencies lists:
# - all_currencies
# - obsolete_currencies
# - p13_currencies
# - f_currencies
# - ff_currencies
# - currencies_per_continent
%run ./Utilities.ipynb

In [5]:
# Load loss functions
# - calculate_loss_function_around_one(weights)
# - calculate_loss_function_vs_t_minus_one(weights)
# - calculate_loss_function_vs_t_minus_one_with_balancing(weights)
# - calculate_loss_function_vs_t_minus_one_for_period(weights, max_date)
%run ./LossFunctions.ipynb

In [6]:
# Create dataframe with pre-defined base currency
df = create_original_df("EUR")

In [7]:
# Determine splits for dev- (train and test) and validation set
train_start = date(2005, 4, 1)
test_start = date(2012, 1, 1)
validation_start = date(2015, 1, 1)
validation_end = date(2020, 10, 1)

# Create dev (train and test) and validation set
train, test, dev, val = split_data(df, train_start, test_start, validation_start, validation_end)

In [8]:
results = pd.DataFrame()

# Calculate loss function score per currency
for base_currency in all_currencies:
    for currency in all_currencies:
        if currency == base_currency:
            continue
            
        rates = dev[base_currency] / dev[currency]
        normalized_rates = rates / rates.iloc[0]
        
        deviation = np.log(normalized_rates)
        loss_function_score = (deviation**2).mean()

        results = results.append({"currency": currency,
                                  "base_currency": base_currency,
                                  "loss_function_score": loss_function_score},
                                 ignore_index=True)
    
results = results.groupby("currency").median()
results.sort_values("loss_function_score", ascending=True)

Unnamed: 0_level_0,loss_function_score
currency,Unnamed: 1_level_1
CAD,0.0146
MYR,0.01597
NOK,0.01745
HRK,0.02022
SEK,0.02037
CZK,0.02069
EUR,0.02119
DKK,0.02121
USD,0.02143
HKD,0.02162


In [9]:
exchange_table = dict()

for base_currency in all_currencies:
    exchange_table[base_currency] = 1/train[all_currencies].divide(train[base_currency], axis=0)
    
reference_date_exhange_rate = train[all_currencies].iloc[0]

In [10]:
# Everything contains all currencies now
this_day_by_month = {}
previous_day_by_month = {}
diff_by_month = defaultdict(dict)

for base_currency, table in exchange_table.items():
    if base_currency not in all_currencies:
        continue
        
    exchange_table[base_currency]["month"] = exchange_table[base_currency].index.map(lambda x: int(x.strftime("%Y%m")))

    this_day = exchange_table[base_currency]
    previous_day = pd.DataFrame(exchange_table[base_currency].shift(1), 
                                index=exchange_table[base_currency].index)
    previous_day["month"] = previous_day.index.map(lambda x: int(x.strftime("%Y%m")))
    
    this_day_by_month[base_currency] = {month: values[all_currencies] for month, values in this_day.groupby("month")}
    previous_day_by_month[base_currency] = {month: values[all_currencies] for month, values in previous_day.groupby("month")}
    
    for month in this_day_by_month[base_currency].keys():
        diff_by_month[base_currency][month] = np.abs((this_day_by_month[base_currency][month] - previous_day_by_month[base_currency][month]) / 
                                                      previous_day_by_month[base_currency][month])
    
months = list(exchange_table["EUR"]["month"].unique())

In [11]:
def recalculate_weights(weights):
    """Helper function to recalculate weigths (baseline principle).
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies."""
    weights[weights>0] = 1
    weights = weights/weights.sum()
    return weights

In [12]:
def evaluate_exclusion_of_currency(weights, 
                                   currency_set):
    """Perform backward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""
    # Initialize string giving best currency to exclude
    best_to_exclude = "None"
    # Calculate initial loss function value
    starting_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(weights)
    
    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")
    print("")

    # Each time we eliminate one currency
    for excluded_currency in currency_set:
        print(f"evaluating {excluded_currency}")

        # We create the new weights,
        new_weights = weights.copy()
        new_weights[new_weights.index==excluded_currency] = 0
        new_weights = recalculate_weights(new_weights)
        
        # and calculate the new loss function value
        new_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(new_weights)
        print(f"new loss function value: {new_loss_function_value}")

        # Check whether the new loss function value is lower than the (so far) best one, and
        # the improvement needs to be > .5%
        if new_loss_function_value <= best_loss_function_value and new_loss_function_value < .995*starting_loss_function_value:
            print(f"better solution found by excluding {excluded_currency}")
            print(f"loss function decreased by {(starting_loss_function_value-new_loss_function_value)/starting_loss_function_value*100:.2f}% compared to starting value")
            # If so, save these results
            best_loss_function_value = new_loss_function_value
            best_to_exclude = excluded_currency
        print("")
        
    if best_to_exclude == "None":
        print("No currency needed to be removed.")
    else:
        print(f"\033[1m{best_to_exclude} is removed from currency set\n\033[0m")
    
    # Exclude the currency
    currency_set.remove(best_to_exclude)
    return starting_loss_function_value, best_loss_function_value, best_to_exclude, currency_set

In [None]:
weights = pd.Series(np.ones(len(all_currencies))/len(all_currencies), index=reference_date_exhange_rate.loc[all_currencies].index)
currency_set = set(all_currencies)
max_excluded_currencies = 23

# Create list to save results
backward_selection_result = []

for i in range(0, max_excluded_currencies):
    starting_loss_function_value, best_loss_function_value, best_to_exclude, currencies = evaluate_exclusion_of_currency(weights, currency_set)
    if best_to_exclude == "None":
        continue
    weights[weights.index==best_to_exclude] = 0
    weights = recalculate_weights(weights)
    
    backward_selection_result.append((best_to_exclude, starting_loss_function_value, best_loss_function_value))
    
print(f"\n\nFinal currency selection: {set(currencies)}")
    
backward_selection_result = pd.DataFrame(backward_selection_result)
backward_selection_result.columns = ["Removed currency", "Previous loss function value", "New loss function value"]

In [None]:
backward_selection_result

In [13]:
def evaluate_inclusion_of_currency(weights, 
                                   currency_set):
    """Perform forward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""

    # Initialize string giving best currency to exclude
    best_to_include = "None"
    
    if weights.sum()>0:
        # Calculate initial loss function value
        starting_loss_function_value = calculate_loss_function_around_one(weights)
    else:
        starting_loss_function_value = 999999

    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")

    # Each time we include one currency
    for included_currency in currency_set:
        print(f"\nincluding {included_currency}")

        # We create the new weights,
        new_weights = weights.copy()
        new_weights[new_weights.index==included_currency] = 1
        new_weights = recalculate_weights(new_weights)

        # and calculate the new loss function value
        new_loss_function_value = calculate_loss_function_around_one(new_weights)
        print(f"new loss function value: {new_loss_function_value}")
        
        # Check whether the new loss function value is lower than the (so far) best one, and
        # the improvement needs to be > .5%
        if new_loss_function_value <= best_loss_function_value and new_loss_function_value < 0.995*starting_loss_function_value:
            print(f"\nnew best solution found by including {included_currency}")
            print(f"loss function decreased by {(starting_loss_function_value-new_loss_function_value)/starting_loss_function_value*100:.2f}% compared to starting value")
            # If so, save these results
            best_loss_function_value = new_loss_function_value
            best_to_include = included_currency
        print("")

    if best_to_include == "None":
        print("No currency needed to be included.")
    else:
        print(f"\033[1m{best_to_include} is added to currency set\n\033[0m")
        # Exclude the currency
        currency_set.remove(best_to_include)
    return starting_loss_function_value, best_loss_function_value, best_to_include, currency_set

In [None]:
# Create weights series and specificy first currency to include
weights = pd.Series(0, index=reference_date_exhange_rate.loc[all_currencies].index)
# TODO: Change starting currency
first_currency = "SEK"
weights[first_currency] = 1
currency_set = set(all_currencies)
currency_set.remove(first_currency)

# Create list to save results
forward_selection_result = []
forward_selection_result.append((first_currency, "-", "-"))

# Set maximum number of currencies to include
max_currencies_to_include = 10

for i in range(0, max_currencies_to_include):
    starting_loss_function_value, best_loss_function_value, best_to_include, currencies = evaluate_inclusion_of_currency(weights, currency_set)
    if best_to_include == "None":
        break
    weights[weights.index==best_to_include] = 1
    weights = recalculate_weights(weights)
    forward_selection_result.append((best_to_include, starting_loss_function_value, best_loss_function_value))
    
print(f"\n\nFinal currency selection: {set(all_currencies) - set(currencies)}")
    
forward_selection_result = pd.DataFrame(forward_selection_result)
forward_selection_result.columns = ["Added currency", "Previous loss function value", "New loss function value"]

In [None]:
forward_selection_currencies = list(forward_selection_result["Added currency"].unique())
forward_selection_result

In [None]:
def evaluate_inclusion_of_currency_differentiate_weights(weights, 
                                                         currency_set):
    """Perform forward selection of currencies.
   
    Parameters
    ----------
    weights : array
        The initial weights given to all currencies.
    currency_set : set
        The set containing all currencies of interest."""

    # Initialize string giving best currency to exclude
    best_to_include = "None"
    # Initialize string giving best currency weights
    best_weights = weights.copy()
    # Calculate initial loss function value
    starting_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(weights)

    # So far, this is the best solution
    best_loss_function_value = starting_loss_function_value
    print(f"starting loss function value: {best_loss_function_value}")

    # Each time we include one currency
    for included_currency in currency_set:
        print(f"\nincluding {included_currency}\n")
        
        new_weights = weights.copy()
        no_currencies = (new_weights>0).sum()+1
        possible_weights_included_currency = np.arange(0, 1/no_currencies, 1/no_currencies/10) + 1/no_currencies/10
        
        # We create the new weights,
        for possible_weight in possible_weights_included_currency:
            new_weights[new_weights>0] = (1-possible_weight)/(no_currencies-1)
            new_weights[included_currency] = possible_weight
        
            # and calculate the new loss function value
            new_loss_function_value = calculate_loss_function_vs_t_minus_one_with_balancing(new_weights)
            print(f"weight {included_currency}: {possible_weight}")
            print(f"other weight(s): {(1-possible_weight)/(no_currencies-1)}")
            print(f"new loss function value: {new_loss_function_value}\n")

            # Check whether the new loss function value is lower than the (so far) best one, and
            # the improvement needs to be > .5%
            if new_loss_function_value <= best_loss_function_value and new_loss_function_value < 0.995*starting_loss_function_value:
                print(f"\nbetter solution found by including {included_currency}")
                print(f"loss function decreased by {(best_loss_function_value-new_loss_function_value)/best_loss_function_value*100:.2f}% compared to current best solution\n")
                # If so, save these results
                best_weights = new_weights
                print(best_weights)
                best_loss_function_value = new_loss_function_value
                best_to_include = included_currency

    if best_to_include == "None":
        print("No currency needed to be included.")
    else:
        print(f"\033[1m{best_to_include} is added to currency set\n")
        # Exclude the currency
        currency_set.remove(best_to_include)
    return starting_loss_function_value, best_loss_function_value, best_to_include, best_weights, currency_set

In [None]:
# Create weights series and specificy first currency to include
weights = pd.Series(0, index=reference_date_exhange_rate.loc[all_currencies].index)
# TODO: Change starting currency
first_currency = "SEK"
weights[first_currency] = 1
currency_set = set(all_currencies)
currency_set.remove(first_currency)

# Create list to save results
forward_selection_weights_result = []
forward_selection_weights_result.append((first_currency, "-", "-"))

# Set maximum number of currencies to include
max_currencies_to_include = 14

for i in range(0, max_currencies_to_include):
    _, _, best_to_include, best_weights, currencies = evaluate_inclusion_of_currency_differentiate_weights(weights, currency_set)
    if best_to_include == "None":
        break
    weights = best_weights
    forward_selection_weights_result.append((best_to_include, starting_loss_function_value, best_loss_function_value))
    
forward_selection_weights_result = pd.DataFrame(forward_selection_weights_result)
forward_selection_weights_result.columns = ["Added currency", "Previous loss function value", "New loss function value"]

In [None]:
assert get_libra_mix(reference_date_exhange_rate).sum() == 1
assert get_sdr_mix(reference_date_exhange_rate).sum() == 1