In [None]:
# execute if not installed
import sys
import os 
sys.path.insert(0, '../')

import mogptk

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

%reload_ext autoreload
%autoreload 2

In [None]:
# plot config
sns.set_context('paper', font_scale=1.3)
sns.set_style('ticks')
plt.rcParams['figure.figsize'] = (10, 5)

In [None]:
data_path = 'data/finance_datasets/stock_data/'
df_list = []
cols = []
for fname in os.listdir(data_path):
    df = pd.read_csv(data_path + fname)
    df['Date'] = pd.to_datetime(df['Date'])
    cols.append(fname)
    df_list.append(df)

In [None]:
df_list[0].head()

 2001-05-29 y 2001-12-25

In [None]:
for i, df in enumerate(df_list):
    idx = (df['Date'] >= np.datetime64('2001-05-29')) & (df['Date'] <= np.datetime64('2001-12-25'))
    df_list[i] = df.loc[idx, :][['Date', 'Adj Close']]
    # df_list[i].set_index('Date', inplace=True)
    df_list[i]['Day'] = (df_list[i]['Date'] - df_list[i]['Date'].iloc[0]).dt.days.values
    print(df_list[i].shape)

In [None]:
data = []
for df in df_list:
    data.append(mogptk.Data(X=df['Day'].values, Y=df['Adj Close'].values))

In [None]:
# randomly sample from the data
# for channel in data:
#     channel.remove_randomly(pct=0.3)

# drop chunks to simulate sensor failure

data[0].remove_range(df['Day'].iloc[-1], None)
# data[1].remove_range(90, 120)
# data[2].remove_range(90, 120)
data[3].remove_range(90, 120)
data[4].remove_range(None, 30)
data[5].remove_range(None, 30)
data[6].remove_range(50, 60)
data[7].remove_range(50, 65)
# data[8].remove_range(90, 120)

for i in range(1, len(cols)):
     data[i].remove_range(180, None)

# add pred values

# use data class
x_test = [channel.X[~channel.mask] for channel in data]
y_test = [df_list[i]['Adj Close'].values[~data[i].mask] for i in range(len(cols))]

## Likelihood

In [None]:
like = None
like_params = {}

#  MOSM

In [None]:
n_trials = 1

mosm_mae = np.zeros((n_trials, len(df_list)))
mosm_mape = np.zeros((n_trials, len(df_list)))
mosm_rmse = np.zeros((n_trials, len(df_list)))

for n in range(n_trials):
    model_mosm = mogptk.MOSM(data, Q=5)
    model_mosm.init_params('BNSE')
    
    print('Starting trial', n)
    model_mosm.train(method='L-BFGS-B',
                     maxiter=2000,
                     tol=1e-50,
                     likelihood=like,
                     variational=True,
                     like_params=like_params)
    print('Finished trial', n)
    print('='*50)        
    
    # errors per channel
    error = mogptk.test_errors(model_mosm, x_test=x_test, y_test=y_test)
    
    mosm_mae[n, :] = np.array(error[0])[:, 0]
    mosm_mape[n, :] = np.array(error[0])[:, 1]
    mosm_rmse[n, :] = np.array(error[0])[:, 2]
    
pd.DataFrame(np.c_[mosm_mae.mean(0), mosm_mape.mean(0), mosm_rmse.mean(0),
                             mosm_mae.std(0), mosm_mape.std(0), mosm_rmse.std(0)],
                       columns=['MAE', 'MAPE', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mogptk.plot_prediction(model_mosm, grid=(5, 2), names=cols, title='Stocks MOSM');

# CSM

In [None]:
n_trials = 1

csm_mae = np.zeros((n_trials, len(df_list)))
csm_mape = np.zeros((n_trials, len(df_list)))
csm_rmse = np.zeros((n_trials, len(df_list)))

for n in range(n_trials):
    model_csm = mogptk.CSM(data, Q=5)
    model_csm.init_params('BNSE')
    
    print('Starting trial', n)
    model_csm.train(method='L-BFGS-B',
                    maxiter=2000,
                    tol=1e-50,
                    likelihood=like,
                    variational=True,
                    like_params=like_params)
    print('Finished trial', n)
    print('='*50)
    
    # errors per channel
    error = mogptk.test_errors(model_csm, x_test=x_test, y_test=y_test)
    
    csm_mae[n, :] = np.array(error[0])[:, 0]
    csm_mape[n, :] = np.array(error[0])[:, 1]
    csm_rmse[n, :] = np.array(error[0])[:, 2]

pd.DataFrame(np.c_[csm_mae.mean(0), csm_mape.mean(0), csm_rmse.mean(0),
                             csm_mae.std(0), csm_mape.std(0), csm_rmse.std(0)],
                       columns=['MAE', 'MAPE', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mogptk.plot_prediction(model_csm, grid=(5, 2), names=cols, title='Stocks CSM');

# SM-LMC

In [None]:
n_trials = 1

smlmc_mae = np.zeros((n_trials, len(df_list)))
smlmc_mape = np.zeros((n_trials, len(df_list)))
smlmc_rmse = np.zeros((n_trials, len(df_list)))

for n in range(n_trials):
    print('Trial ', n)
    model_smlmc = mogptk.SM_LMC(data, Q=5)
    model_smlmc.init_params('BNSE')
    
    model_smlmc.train(method='L-BFGS-B',
                      maxiter=2000,
                      tol=1e-50,
                      likelihood=like,
                      variational=True,
                      like_params=like_params)
    
    error = mogptk.test_errors(model_smlmc, x_test=x_test, y_test=y_test)
    
    smlmc_mae[n, :] = np.array(error[0])[:, 0]
    smlmc_mape[n, :] = np.array(error[0])[:, 1]
    smlmc_rmse[n, :] = np.array(error[0])[:, 2]

pd.DataFrame(np.c_[smlmc_mae.mean(0), smlmc_mape.mean(0), smlmc_rmse.mean(0),
                   smlmc_mae.std(0), smlmc_mape.std(0), smlmc_rmse.std(0)],
             columns=['MAE', 'MAPE%', 'RMSE', 'MAE std', 'MAPE% std', 'RMSE std'])

In [None]:
mogptk.plot_prediction(model_smlmc, grid=(5, 2), names=cols, title='Stocks SM-LMC');