# Application of Quantum Annealing in Identifying Financial Portfolio Correlations

Authors:

## Environment Setup

In [1]:
!pip install pandas_datareader
!pip install yfinance



In [2]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
import statsmodels.api as sm
from abc import ABC, abstractmethod

## Hyperparameters

(*) Houcine, Benlaria, et al. "The relationship between crude oil prices, EUR/USD exchange rate and gold prices." International Journal of Energy Economics and Policy 10.5 (2020): 234-242.

In [3]:
# Time of concern
concerned_year = '2019'
concerned_month = '06'
start_date = f'{concerned_year}-01-01'
end_date = f'{concerned_year}-12-31'

# Proportionality coefficients between data pairs; referenced from (*)
euer_gp_pc = 0.51
cop_gp_pc = 0.77
cop_euer_pc = 0.78

# Labels
euer_label = 'EUR_to_USD_Exchange_Rate'
cop_label = 'Crude_Oil_Price'
gp_label = 'Gold_Price'

# Large number to prevent results yielding contradicting scenarios
large_num = 10**4

## Data Processing

In [4]:
def clean_data(df, label='item'):
  """
  Clean and preprocess time series data.

  Parameters:
  - df (DataFrame): Input DataFrame containing time series data.
  - label (str, optional): Label to assign to the column containing the time series data. Default is 'item'.

  Returns:
  - DataFrame: Cleaned and preprocessed DataFrame with the following modifications:
      - Renames the index axis to 'Date'.
      - Renames the first column to the specified `label`.
      - Drops rows with missing values.
      - Resamples the data to monthly frequency and calculates the mean.
      - Converts the index values to strings and retains only the month portion.
  """
  df = (df.rename_axis('Date')
      .rename(columns={df.columns[0]: label})
      .dropna()
      .resample('M')
      .mean())
  df.index = df.index.astype(str).str[len('YYYY-'):len('YYYY-MM')]
  return df

In [5]:
def regression_prediction(x, y, X, Y, X_label):
  """
  Perform regression prediction based on the given parameters.

  Args:
  - x: The x value.
  - y: The y value.
  - X: The predictor variable.
  - Y: The response variable.
  - X_label: The label of the predictor variable.

  Returns:
  - The regression prediction.
  """
  X = sm.add_constant(X)
  model = sm.OLS(Y, X)
  results = model.fit()
  print(results.params[X_label], results.params['const'])
  return y - (x * results.params[X_label]) - results.params['const']

In [6]:
def join_labels_sorted(labels):
  return '_'.join(sorted(labels))

In [7]:
# Get data
euer_data_raw = pdr.get_data_fred('DEXUSEU', start_date, end_date)
cop_data_raw = pdr.get_data_fred('DCOILWTICO', start_date, end_date)
gp_data_raw = pd.DataFrame(
    yf.Ticker('GLD')
    .history(start=start_date, end=end_date)
    ['Close'] # take closing price
    )

# Clean data
euer_data = clean_data(euer_data_raw, euer_label)
cop_data = clean_data(cop_data_raw, cop_label)
gp_data = clean_data(gp_data_raw, gp_label)

In [8]:
# Current values of concern
val_dict = {
  euer_label: euer_data.at[concerned_month, euer_label],
  cop_label: cop_data.at[concerned_month, cop_label],
  gp_label: gp_data.at[concerned_month, gp_label],
}

# Mean values
mean_dict = {
  euer_label: euer_data[euer_label].mean(),
  cop_label: cop_data[cop_label].mean(),
  gp_label: gp_data[gp_label].mean(),
}


# Regression model between data pairs
reg_pred_dict = {
  join_labels_sorted([euer_label, gp_label]): regression_prediction(val_dict[euer_label], val_dict[gp_label], euer_data[euer_label], gp_data[gp_label], euer_label),
  join_labels_sorted([cop_label, gp_label]): regression_prediction(val_dict[cop_label], val_dict[gp_label], cop_data[cop_label], gp_data[gp_label], cop_label),
  join_labels_sorted([cop_label, euer_label]): regression_prediction(val_dict[cop_label], val_dict[euer_label], cop_data[cop_label], euer_data[euer_label], cop_label)
}


# Proportionality coefficients between data pairs
pc_dict = {
  join_labels_sorted([euer_label, gp_label]): euer_gp_pc,
  join_labels_sorted([cop_label, gp_label]): cop_gp_pc,
  join_labels_sorted([cop_label, euer_label]): cop_euer_pc
}

-564.2630482556335 763.178988448503
-0.5798785414572458 164.46579637238585
-0.0008779112611327919 1.1696349047774341


In [9]:
to_print = {
  'val': val_dict,
  'mean': mean_dict,
  'reg_pred': reg_pred_dict,
  'pc': pc_dict
}

for item_name, item_to_print in to_print.items():
  for category, data in item_to_print.items():
    print(f'{item_name}_{category}: {data:.4f}')
  print()

val_EUR_to_USD_Exchange_Rate: 1.1295
val_Crude_Oil_Price: 54.6575
val_Gold_Price: 128.4715

mean_EUR_to_USD_Exchange_Rate: 1.1196
mean_Crude_Oil_Price: 56.9807
mean_Gold_Price: 131.4239

reg_pred_EUR_to_USD_Exchange_Rate_Gold_Price: 2.6361
reg_pred_Crude_Oil_Price_Gold_Price: -4.2996
reg_pred_Crude_Oil_Price_EUR_to_USD_Exchange_Rate: 0.0079

pc_EUR_to_USD_Exchange_Rate_Gold_Price: 0.5100
pc_Crude_Oil_Price_Gold_Price: 0.7700
pc_Crude_Oil_Price_EUR_to_USD_Exchange_Rate: 0.7800



## Formulate QUBO Optimization Function

In [10]:
def _get_label(id):
  """
  Get the label based on the given id.

  Args:
  - id: An integer representing the label ID.

  Returns:
  - The corresponding label.

  Raises:
  - ValueError: If the provided id is invalid.
  """
  if id == 0:
    return euer_label

  elif id == 1:
    return cop_label

  elif id == 2:
    return gp_label

  raise ValueError(f'Invalid id: {id}')


def _xi_xi(val, mean, sign=False):
  if sign:
    return val - mean
  return mean - val


def _xi_xj(pc, reg_pred, sign=False):
  if sign:
    return pc * reg_pred
  return -1 * pc * reg_pred


def build_Q_matrix():
  """
  Build the Q matrix.

  Returns:
  - The Q matrix.
  """
  matrix_size = 6
  matrix = [[0]*matrix_size for i in range(matrix_size)]
  for i in range(matrix_size):
    for j in range(matrix_size):

      label1 = _get_label(int(i / 2))
      label2 = _get_label(int(j / 2))

      if i == j:
        matrix[i][j] = _xi_xi(val_dict[label1], mean_dict[label1], i%2)
      elif (i+j) % 2:
        matrix[i][j] = large_num
      else:
        label = join_labels_sorted([label1, label2])
        matrix[i][j] = _xi_xj(pc_dict[label], reg_pred_dict[label], i%2)

  return np.array(matrix)

In [11]:
# Build quadratic coefficient matrix
Q = build_Q_matrix()

## Solve Solution

In [12]:
min_val, best_X_list = None, None
all_X_lists = [euer + cop + gp for euer in [[0,1], [1,0]] for cop in [[0,1], [1,0]] for gp in [[0,1], [1,0]]]

for X_list in all_X_lists:
  X = np.array(X_list)
  val = np.dot(np.dot(np.transpose(X), Q), X)
  if min_val == None or val > min_val:
    min_val = val
    best_X_list = X_list

print(f'(x_0, x_1, x_2, x_3, x_4, _x5) = {best_X_list}')

(x_0, x_1, x_2, x_3, x_4, _x5) = [0, 1, 1, 0, 1, 0]
