# Oil, Gold, NASDAQ, and USD-index dataset

- Oil: Brent daily in USD from https://www.eia.gov/dnav/pet/hist/RBRTEd.htm
- Gold: LMBA gold daily at 10:30AM in USD from https://fred.stlouisfed.org/series/GOLDAMGBD228NLBM
- NASDAQ: adjusted closing price (daily) from https://finance.yahoo.com/quote/%5EIXIC/history?p=%5EIXIC
- USD index: traded weighted on broad currency index from https://fred.stlouisfed.org/series/TWEXB

In [None]:
# ale
# execute if not installed
import sys
sys.path.insert(0, '../')

import mogptk
import gpflow

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import datetime

%reload_ext autoreload
%autoreload 2

import mogptk
import numpy as np

%reload_ext autoreload
%autoreload 2

In [None]:
oil = mogptk.LoadCSV('data/gonu/brent-daily.csv',
                     'Date',
                     'Price',
                     name='Oil',
                     format={'Date': mogptk.FormatDate})
gold = mogptk.LoadCSV('data/gonu/lmba-gold-usd-am-daily.csv',
                      'Date',
                      'Price',
                      name='Gold',
                      format={'Date': mogptk.FormatDate})
nasdaq = mogptk.LoadCSV('data/gonu/nasdaq.csv',
                        'Date',
                        'Adj Close',
                        name='NASDAQ',
                        format={'Date': mogptk.FormatDate})
usd = mogptk.LoadCSV('data/gonu/TWEXB.csv',
                     'Date',
                     'Price',
                     name='USD',
                     format={'Date': mogptk.FormatDate})

f_paths = [
    'data/gonu/brent-daily.csv',
    'data/gonu/lmba-gold-usd-am-daily.csv',
    'data/gonu/nasdaq.csv',
    'data/gonu/TWEXB.csv'
          ]
cols = ['Oil', 'Gold', 'NASDAQ', 'USD']

data = [oil, gold, nasdaq, usd]


np.random.seed(1)
for i, channel in enumerate(data):
    channel.filter('2015-01-01', '2018-12-31')
    channel.aggregate('7d')
    
    channel.transform(mogptk.TransformLog)
    channel.transform(mogptk.TransformDetrend)
    
    if i == 0:
        channel.remove_range('2018-10-05', None)
        channel.remove_randomly(pct=0.3)
    if i == 1:
        channel.remove_range('2016-11-15', '2017-01-01')
        channel.remove_randomly(pct=0.6)
    if i == 2:
        # channel.remove_range('2018-10-01', None)
        channel.remove_randomly(pct=0.6)
        # channel.remove_range('2018-10-01', None)
        # channel.remove_range('2016-01-01', '2016-03-01')
        
    if i == 3:
        channel.remove_range('2016-03-15', '2016-06-01')
        channel.remove_randomly(pct=0.6)
        
    channel.plot()
    
# for i, channel in enumerate(data):
    # channel.plot_spectrum(per='day')    
    # print('BNSE:', channel.get_bnse_estimation())
    # print('LombScargle:', channel.get_ls_estimation())
    
# oil.set_pred_range('2015-07-01', '2018-07-01', n=1000)
# gold.set_pred_range('2015-07-01', '2018-07-01', n=1000)
# nasdaq.set_pred_range('2015-07-01', '2018-07-01', n=1000)
# usd.set_pred_range('2015-07-01', '2018-07-01', n=1000)

x_test = [channel.X[~channel.mask] for channel in data]
y_test = [mogptk.data._detransform(c.transformations, c.X, c.Y)[~c.mask] for c in data]

## MOSM

In [None]:
%%time
n_trials = 5

mosm_list = []
mosm_mae = np.zeros((n_trials, len(cols)))
mosm_mape = np.zeros((n_trials, len(cols)))
mosm_rmse = np.zeros((n_trials, len(cols)))

for n in range(n_trials):
    model_mosm = mogptk.MOSM(data, Q=4)
    model_mosm.init_params('BNSE')
    
    print('Starting trial', n)
    model_mosm.train(method='L-BFGS-B', maxiter=5000, tol=1e-50)
    
    print('='*50)
    
    mosm_list.append(model_mosm)
    
    # errors per channel
    error = mogptk.test_errors(model_mosm, x_test=x_test, y_test=y_test)
    
    mosm_mae[n, :] = np.array(error[0])[:, 0]
    mosm_mape[n, :] = np.array(error[0])[:, 1]
    mosm_rmse[n, :] = np.array(error[0])[:, 2]
    print('Finished trial', n)

In [None]:
pd.DataFrame(np.c_[mosm_mae.mean(0), mosm_mape.mean(0), mosm_rmse.mean(0),
                             mosm_mae.std(0), mosm_mape.std(0), mosm_rmse.std(0)],
                       columns=['MAE', 'MAPE', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
# best_model = mosm_list[np.argmin(mosm_mape.mean(1))]
ylims = [(20, 100), (1000, 1500), (4000, 8600), (110, 135)]
f, axarr = mogptk.plot_prediction(best_model,
                       grid=(2, 2),
                       names=cols,
                       title='',
                       ylims=ylims,
                       figsize=(12, 4));

yticks = [(30, 90), (1100, 1400), (4500, 8000), (114, 130)]

for i in range(len(cols)):
    axarr[i].set_title(cols[i], fontsize=13)
    axarr[i].set_yticks(np.linspace(yticks[i][0], yticks[i][1], 3))

    channel = model_mosm.data[i]

    xlabels = pd.date_range('2015-01-01', '2018-12-31', periods=9).date
    xticks = [channel.formatters[0]._parse(str(label)) for label in xlabels]
    
    new_xlabels = ['']*9
    xlabels[::2][2:] += datetime.timedelta(days=1)
    new_xlabels[::2] = xlabels[::2]
    
    axarr[i].set_xticks(xticks)
    axarr[i].set_xticklabels(new_xlabels)

plt.tight_layout()
    
# plt.savefig('output/mosm_gonu.pdf', bbox_inches='tight')

mosm = mogptk.MOSM(data, Q=3)
mosm.init_params()

mosm.train(tol=1e-6, maxiter=2000)
mosm.plot()
mosm.print()
mosm.info()

mosm.predict()
mosm.plot_data()

In [None]:
f, ax, corr_matrix = best_model.plot_correlations()
ax.set_title('GONU', pad=20)
ax.set_yticks(np.arange(4));
ax.set_yticklabels(cols);
ax.set_xticks(np.arange(4));
ax.set_xticklabels(cols);
plt.savefig('output/corr_gonu.pdf', bbox_inches='tight')

In [None]:
corr_list = []

for model in mosm_list:
    _, _, corr = model.plot_correlations();
    corr_list.append(corr)

In [None]:
corr_array = np.array(corr_list)
corr_array.shape

In [None]:
color_range = np.abs(corr_array.mean(0)).max()

f, ax = plt.subplots()
im = ax.matshow(corr_array.mean(0), cmap='coolwarm', vmin=-color_range, vmax=color_range)
f.colorbar(im)
for (i, j), z in np.ndenumerate(corr_array.mean(0)):
    ax.text(j, i, '{:0.1f}'.format(z), ha='center', va='center', 
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.5, edgecolor='0.9'))

ax.set_yticks(np.arange(4));
ax.set_yticklabels(cols);
ax.set_xticks(np.arange(4));
ax.set_xticklabels(cols);
plt.savefig('output/corr_gonu_mean.pdf', bbox_inches='tight')

## MOSM-Student-T like

In [None]:
like = gpflow.likelihoods.StudentT

In [None]:
%%time
n_trials = 1
like_params = {'scale':1e-3, 'df':3}

mosm_student_list = []
mosm_student_mae = np.zeros((n_trials, len(cols)))
mosm_student_mape = np.zeros((n_trials, len(cols)))
mosm_student_rmse = np.zeros((n_trials, len(cols)))

for n in range(n_trials):
    mosm_student = mogptk.MOSM(data, Q=4)
#    model_mosm.init_params('BNSE')
    mosm_student.params = best_model.params
    
    print('Starting trial', n)
    # mosm_student.build(likelihood=like, like_params=like_params, variational=False)
    mosm_student.train(method='L-BFGS-B', maxiter=5000, tol=1e-50, likelihood=like, like_params=like_params, variational=True)
    
    print('='*50)
    
    mosm_student_list.append(mosm_student)
    
    # errors per channel
    error = mogptk.test_errors(mosm_student, x_test=x_test, y_test=y_test)
    
    mosm_student_mae[n, :] = np.array(error[0])[:, 0]
    mosm_student_mape[n, :] = np.array(error[0])[:, 1]
    mosm_student_rmse[n, :] = np.array(error[0])[:, 2]
    print('Finished trial', n)

In [None]:
pd.DataFrame(np.c_[mosm_student_mae.mean(0), mosm_student_mape.mean(0), mosm_student_rmse.mean(0),
                             mosm_student_mae.std(0), mosm_student_mape.std(0), mosm_student_rmse.std(0)],
                       columns=['MAE', 'MAPE', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mosm_mape.mean(), mosm_student_mape.mean()

In [None]:
mosm_student.likelihood.scale

In [None]:
best_student = mosm_student_list[np.argmin(mosm_student_mape.mean(1))]
ylims = [(20, 100), (1000, 1500), (4000, 8600), (110, 135)]
f, axarr = mogptk.plot_prediction(best_student,
                       grid=(2, 2),
                       names=cols,
                       title='',
                       ylims=ylims,
                       figsize=(12, 4));

yticks = [(30, 90), (1100, 1400), (4500, 8000), (114, 130)]

for i in range(len(cols)):
    axarr[i].set_title(cols[i], fontsize=13)
    axarr[i].set_yticks(np.linspace(yticks[i][0], yticks[i][1], 3))

    channel = best_student.data[i]

    xlabels = pd.date_range('2015-01-01', '2018-12-31', periods=9).date
    xticks = [channel.formatters[0]._parse(str(label)) for label in xlabels]
    
    new_xlabels = ['']*9
    xlabels[::2][2:] += datetime.timedelta(days=1)
    new_xlabels[::2] = xlabels[::2]
    
    axarr[i].set_xticks(xticks)
    axarr[i].set_xticklabels(new_xlabels)

plt.tight_layout()

In [None]:
import pickle

with open('output/best_mosm_gauss_likeli.p', 'wb') as fp:
    pickle.dump(best_model.params, fp, protocol=pickle.HIGHEST_PROTOCOL)

## CSM

In [None]:
%%time
n_trials = 5

csm_mae = np.zeros((n_trials, len(cols)))
csm_mape = np.zeros((n_trials, len(cols)))
csm_rmse = np.zeros((n_trials, len(cols)))

for n in range(n_trials):
    model_csm = mogptk.CSM(data, Q=3)
    model_csm.init_params('BNSE')
    
    print('Starting trial', n)
    model_csm.train(method='L-BFGS-B', maxiter=8000, tol=1e-15)
    print('Finished trial', n)
    print('='*50)
    
    # errors per channel
    error = mogptk.test_errors(model_csm, x_test=x_test, y_test=y_test)
    
    csm_mae[n, :] = np.array(error[0])[:, 0]
    csm_mape[n, :] = np.array(error[0])[:, 1]
    csm_rmse[n, :] = np.array(error[0])[:, 2]

In [None]:
pd.DataFrame(np.c_[csm_mae.mean(0), csm_mape.mean(0), csm_rmse.mean(0),
                             csm_mae.std(0), csm_mape.std(0), csm_rmse.std(0)],
                       columns=['MAE', 'MAPE', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mogptk.plot_prediction(model_csm,grid=(2, 2),
                       names=cols,
                       title='',
                       ylims=ylims,
                       figsize=(12, 4));

csm = mogptk.CSM(data, Q=3)
csm.init_params()
csm.train(tol=1e-6, maxiter=2000)
csm.print()

csm.predict()
csm.plot_data()

## SM-LMC

In [None]:
%%time
n_trials = 5

smlmc_mae = np.zeros((n_trials, len(cols)))
smlmc_mape = np.zeros((n_trials, len(cols)))
smlmc_rmse = np.zeros((n_trials, len(cols)))

for n in range(n_trials):
    print('Trial ', n)
    model_smlmc = mogptk.SM_LMC(data, Q=3)
    model_smlmc.init_params('BNSE')
    print('='*50)
    
    model_smlmc.train(method='L-BFGS-B', maxiter=8000, tol=1e-50)
    
    error = mogptk.test_errors(model_smlmc, x_test=x_test, y_test=y_test)
    
    smlmc_mae[n, :] = np.array(error[0])[:, 0]
    smlmc_mape[n, :] = np.array(error[0])[:, 1]
    smlmc_rmse[n, :] = np.array(error[0])[:, 2]

In [None]:
pd.DataFrame(np.c_[smlmc_mae.mean(0), smlmc_mape.mean(0), smlmc_rmse.mean(0),
                   smlmc_mae.std(0), smlmc_mape.std(0), smlmc_rmse.std(0)],
             columns=['MAE', 'MAPE%', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mogptk.plot_prediction(model_smlmc,grid=(2, 2),
                       names=cols,
                       title='',
                       ylims=ylims,
                       figsize=(12, 4));

smlmc = mogptk.SM_LMC(data, Q=3)
smlmc.init_params()
smlmc.train(tol=1e-6, maxiter=2000)
smlmc.print()

smlmc.predict()
smlmc.plot_data()

## Results

In [None]:
print(mosm_mape[:, :].mean(), csm_mape[:, :].mean(), smlmc_mape[:, :].mean())

In [None]:
print(mosm_mape[:, :].std(), csm_mape[:, :].std(), smlmc_mape[:, :].std())