In [None]:
#######################################################################
#######################################################################
######################### Importing Packages ##########################
#######################################################################
#######################################################################

# plotting packages
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# numpy and pandas
import numpy as np
from numpy.random import rand
from numpy import ix_
import pandas as pd

# miscellany to make code neater
from typing import Callable
import math
from datetime import datetime, timedelta
import os
from collections import defaultdict

# some basic statistical/numerical scipy parts used to calculate scores
import scipy.stats
import scipy.integrate as integrate
import scipy.special as special

# Tensorflow and Keras parts which are used for data processing and model creation
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.layers import Dense, Activation, Dropout, LSTM, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# tensorflow pdf calculator which is used to calculate losses efficiently
import tensorflow_probability as tfp
tfd = tfp.distributions

# setting seed as stochastic intialisation
tf.random.set_seed(11)
np.random.seed(1337)

# Data Importing

In [None]:
main_path = "C:/Warwick Final Year/RAE/"
raw_path = main_path + "Data/"
processed_path = main_path + "Processed Data/"
graphs_path = main_path + "Graphs/"
arma_garch_graphs_path = graphs_path + "ARMA_GARCH_Graphs/"
checkpoints_path = main_path + "Code Python/Model Checkpoints/"


ARMA_GARCH = defaultdict(dict)

for which_series in ['DAX','NASDAQ','Nikkei']:
    for filename in os.listdir(processed_path + which_series +'/ARMA_GARCH'):
            if filename.endswith('.csv'):
                ARMA_GARCH[which_series][filename[:-4 or None]] = pd.read_csv(processed_path  
                                                                              + which_series 
                                                                              + '/ARMA_GARCH/' 
                                                                              + filename)

In [None]:
#######################################################################
#######################################################################
###################### Data Importing/Cleaning ########################
#######################################################################
#######################################################################

# import data and calculate log returns from adjusted close
df_Nikkei_RAW = pd.read_csv(raw_path + "/^N225.csv")
df_NASDAQ_RAW = pd.read_csv(raw_path + "^IXIC.csv")
df_DAX_RAW = pd.read_csv(raw_path + "^GDAXI.csv")

In [None]:
def Data_Processor(DATA: pd.DataFrame,
                   batch_size: int,
                   length_sample: int,
                   test_train_split: str) -> dict:
    
    """
    #################################################################################################################
    #################################################################################################################
    
    Processes data to a usable form. Splits data into a training and test set based on date
    given by 'test_train_split'. Makes data automatically batch and use a lag length of 'length_sample'.
    
    #################################################################################################################
    #################################################################################################################
    
    @param DATA: pd.DataFrame, stock index data from Yahoo Finance with Adjusted Closing prices and Date as columns
    @param batch_size: int, size of batch used whilst training models
    @param length_sample: int, number of lagged trading days to use whilst forecasting each timestep
    @param test_train_split: str, date at which to split data for training and validation - format of "dd-mm-yyyy"
    
    #################################################################################################################
    #################################################################################################################
    
    @return: dict, dictionary with 3 keys. DATA key is the log return data for the given series after cleaning
                                           Training key is the training data which is batched and has given sample size
                                           Validation key is the validation data which is batch and has given sample size
    
    #################################################################################################################
    #################################################################################################################
    """
    
    DATA.columns = [c.replace(' ', '_') for c in DATA.columns]
    DATA = DATA[DATA['Adj_Close'].notnull()]
    DATA['log_ret'] = np.log(DATA.Adj_Close) - np.log(DATA.Adj_Close.shift(1))

    # spilt to training and test sets
    DATA    = DATA[['Date', 'log_ret']][1:]
    DATA['Date'] = DATA['Date'].apply(pd.Timestamp)
    DATA.set_index('Date', inplace=True, drop=True)
    
    train = DATA.loc[:test_train_split]
    test  = DATA.loc[test_train_split:]
   
    DATA_train = [[i] for i in train['log_ret']]
    DATA_test  = [[i] for i in test['log_ret']]
    
    time_series_generator = TimeseriesGenerator(DATA_train, 
                                                DATA_train, 
                                                length = length_sample, 
                                                batch_size = batch_size)
    time_series_val_generator = TimeseriesGenerator(DATA_test,
                                                    DATA_test, 
                                                    length = length_sample, 
                                                    batch_size = batch_size)

    
    return {"Data": DATA,
            "Training": time_series_generator,
            "Validation": time_series_val_generator}

In [None]:
Batch_Size = 64
Length = 10
Test_Train_Split = '2015-01-01'



Nikkei = Data_Processor(df_Nikkei_RAW,
                       Batch_Size,
                       Length,
                       Test_Train_Split)
NASDAQ = Data_Processor(df_NASDAQ_RAW,
                       Batch_Size,
                       Length,
                       Test_Train_Split)
DAX = Data_Processor(df_DAX_RAW,
                       Batch_Size,
                       Length,
                       Test_Train_Split)


DATA = {'Nikkei': Nikkei,
        'NASDAQ': NASDAQ,
        'DAX': DAX}

In [None]:
for which_series in ['DAX','NASDAQ','Nikkei']:
    for k in ARMA_GARCH[which_series].keys():
        
    
        means = ARMA_GARCH[which_series][k]['Norm_mu']
        scale   = ARMA_GARCH[which_series][k]['Norm_std']

        x = pd.date_range("2015-01-01", periods=len(means),freq="D")


        plt.plot(x,DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'], color='#1f77b4',zorder=1)

        ppf = scipy.stats.norm.ppf

        SCORE = round(np.sum(np.log(scipy.stats.norm.pdf(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'],
                                                 loc = means,
                                                 scale = scale)))/len(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret']),4)
        NAME = "Normal"

        s1 = plt.fill_between(x, np.add(means,ppf(0.95, loc = means, scale = scale)),
                              np.add(means, ppf(0.05, loc = means, scale = scale)), 
                              color = 'green', zorder = 4, alpha = 0.4)
        s2 = plt.fill_between(x, np.add(means, ppf(0.975, loc = means, scale = scale)),
                         np.add(means, ppf(0.025, loc = means, scale = scale)), 
                         color = 'grey',
                             zorder = 3,
                             alpha = 0.5)
        plt.plot(x, means, color = 'black', zorder = 2)

        plt.title(NAME)

        plt.text(pd.to_datetime('2019-06-01'), 
                -0.08,
                "SCORE=" + str(SCORE),
                horizontalalignment = 'center', 
                fontweight = 'bold', 
                color = 'red',
                fontsize = 'medium')

        years = mdates.YearLocator(10)  
        years_fmt = mdates.DateFormatter('%Y')


        #plt.xaxis.set_major_locator(years)
        #plt.xaxis.set_major_formatter(years_fmt)
        plt.ylabel("Log Returns")
        plt.legend(handles = [s1, s2], labels = ["10%", "5%"], loc = 'upper right')
        plt.ylim([-0.15, 0.15])
        plt.tight_layout() 

        plt.savefig(arma_garch_graphs
                    + which_series 
                    + '_' 
                    + k 
                    +  '_' 
                    + 'Normal'  
                    + '.pdf')
        plt.close()


In [None]:
# next the Laplace
for which_series in ['DAX','NASDAQ','Nikkei']:
    for k in ARMA_GARCH[which_series].keys():
        
    
        means = ARMA_GARCH[which_series][k]['Lap_mu']
        scale   = ARMA_GARCH[which_series][k]['Lap_std']/2

        x = pd.date_range("2015-01-01", periods = len(means), freq = "D")


        plt.plot(x, DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'], color = '#1f77b4', zorder = 1)

        ppf = scipy.stats.laplace.ppf

        SCORE = round(np.sum(np.log(scipy.stats.laplace.pdf(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'],
                                                 loc = means,
                                                 scale = scale)))/len(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret']),4)
        NAME = "Laplace"

        s1 = plt.fill_between(x, np.add(means, ppf(0.95, loc = means, scale = scale)),
                              np.add(means, ppf(0.05, loc = means, scale = scale)), 
                              color = 'green', zorder = 4, alpha = 0.4)
        s2 = plt.fill_between(x, np.add(means,ppf(0.975, loc = means, scale = scale)),
                              np.add(means, ppf(0.025, loc = means, scale = scale)), 
                              color = 'grey',
                              zorder = 3,
                              alpha = 0.5)
        plt.plot(x ,means, color = 'black', zorder = 2)

        plt.title(NAME)

        plt.text(pd.to_datetime('2019-06-01'), 
                -0.08,
                "SCORE=" + str(SCORE),
                horizontalalignment = 'center', 
                fontweight = 'bold', 
                color = 'red',
                fontsize = 'medium')

        years = mdates.YearLocator(10)   
        years_fmt = mdates.DateFormatter('%Y')

        plt.ylabel("Log Returns")
        plt.legend(handles = [s1, s2], labels = ["10%", "5%"], loc = 'upper right')
        plt.ylim([-0.15, 0.15])
        plt.tight_layout() 

        plt.savefig(arma_garch_graphs  
                    + which_series 
                    + "_" 
                    + k 
                    +  "_" 
                    + 'Laplace' 
                    + '.pdf')
        plt.close()

In [None]:
# finally the T
for which_series in ['DAX','NASDAQ','Nikkei']:
    for k in ARMA_GARCH[which_series].keys():
        
    
        shape = ARMA_GARCH[which_series][k]['T_shape'] 

        means = ARMA_GARCH[which_series][k]['T_mu']

        scale   = np.divide(ARMA_GARCH[which_series][k]['T_std'], np.sqrt(np.divide(shape, (shape-2))))

        x = pd.date_range("2015-01-01", periods = len(means), freq = "D")


        plt.plot(x,DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'], color = '#1f77b4', zorder = 1)

        ppf = scipy.stats.t.ppf

        SCORE = round(np.sum(np.log(scipy.stats.t.pdf(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret'],
                                                 loc = means,
                                                 scale = scale,
                                                 df = shape)))/len(DATA[which_series]['Data'].loc['2015-01-01':]['log_ret']),4)
        NAME = "Student_T"

        s1 = plt.fill_between(x, np.add(means, ppf(0.95, loc = means, scale = scale, df = shape)),
                              np.add(means, ppf(0.05, loc = means, scale = scale, df = shape)), 
                              color = 'green', zorder = 4, alpha = 0.4)
        s2 = plt.fill_between(x,np.add(means, ppf(0.975, loc = means, scale = scale, df = shape)),
                         np.add(means, ppf(0.025, loc = means, scale = scale, df = shape)), 
                         color = 'grey',
                             zorder = 3,
                             alpha = 0.5)
        plt.plot(x, means, color = 'black', zorder = 2)

        plt.title(NAME)

        plt.text(pd.to_datetime('2019-06-01'), 
                -0.08,
                "SCORE=" + str(SCORE),
                horizontalalignment = 'center', 
                fontweight = 'bold', 
                color = 'red',
                fontsize = 'medium')

        years = mdates.YearLocator(10)  
        years_fmt = mdates.DateFormatter('%Y')



        plt.ylabel("Log Returns")
        plt.legend(handles = [s1,s2], labels = ["10%","5%"], loc = 'upper right')
        plt.ylim([-0.15, 0.15])
        plt.tight_layout() 

        plt.savefig(arma_garch_graphs  
                    + which_series 
                    + "_" 
                    + k 
                    +  "_" 
                    + 'T'  
                    + '.pdf')
        plt.close()