# Setting up our Notebook

In [10]:
# Things to import

# Standard data, plotting, and mathematical tools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Statistical Tools
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# PCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from keras import utils
from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow as tf

# Grid Search
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.preprocessing import StandardScaler
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Importing the data

In [7]:
# Importing the data

dfs=['Non-Scaled TA Features 1H for BTC.csv']

# Classification Grid Search

## X, y, Train, Validation, and Test sets

In [67]:
# Defining a function to create PCA matrices

def PCA_creation_train_val_test(no_components, X_train, X_val, X_test):
    
    # Scaling the data with our X_train matrix
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    
    # Fitting the PCA to our X_train matrix
    pca=PCA(n_components=no_components)
    X_train=pca.fit_transform(X_train)

    # Scaling the X_val and X_test 
    X_val=scaler.transform(X_val)
    X_test=scaler.transform(X_test)

    # Transforming the X_val and X_test
    X_val=pca.transform(X_val)
    X_test=pca.transform(X_test)
    
    return X_train, X_val, X_test

In [207]:
# Train and test splitting and scaling
X=pd.read_csv(dfs[0])
X=X.dropna()
y=X['Label']
X=X.drop('Label', axis=1)
X=X.drop('Unnamed: 0', axis=1)
X=X.drop('Percent Change', axis=1)

# Split into train, val, and test
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=0.1, random_state=100, shuffle=False)
X_train, X_test, y_train, y_test=train_test_split(X_train, y_train, test_size=0.1, random_state=100, shuffle=False)


X_train, X_val, X_test=PCA_creation_train_val_test(20, X_train, X_val, X_test)

## Autoregressive Model Identification: The partial auto-correlation

The estimated partial auto-correlation function (PACF) can be used to identify the order of an autoregressive time series model. Values of $|\tau_h|$ greater or equal to $\frac{\Phi^{-1}(\alpha)}{\sqrt{T}}$, where $T$ is the number of observations and $\Phi(z)$ is the standard normal CDF, are significant lag $h$ partial autocorelations at the $\alpha$ confidence level.

We use the stattools package to estimate the PACF. The nlags parameter is the maximum number of lags used for PACF estimation.

We will then use this to determine the number of look-back steps for the most important feature, our first PCA component

In [11]:
# Finding the best number of steps for our dataset

pacf = sm.tsa.stattools.pacf(X_train[:,0], nlags=8)
T = len(X_train[:,0])

sig_test = lambda tau_h: np.abs(tau_h) > 2.58/np.sqrt(T)

for i in range(len(pacf)):
    if sig_test(pacf[i]) == False:
        n_steps = i - 1
        print('n_steps set to', n_steps)
        break

n_steps set to 3


## Train, Test, and Validation Lagged Sets

In [208]:
## Getting our training, validation, and testing sets

# Lagging function
def lagged_matrices(n_steps, X_matrix, y_matrix):
    X_lagged=[]
    y_lagged=[]
    y_matrix=y_matrix.values
    for i in range(n_steps, X_matrix.shape[0]):
        lag_set=X_matrix[i-n_steps:i]
        X_lagged.append(lag_set) # We are taking the last n_input to the present time periods as the 
        # X values.
        y_lagged.append(y_matrix[i])
    X_lagged=np.array(X_lagged) # They need to be arrays
    return X_lagged, y_lagged


# Classes, sets, and features
num_classes=2 
n_steps = 3 # How many time periods into the past we will look. Our avg_steps was a float, so we convert
# it to an integer
n_features = len(X.iloc[0]) # Number of PCA features

# Getting our lagged matrices
X_train, y_train=lagged_matrices(n_steps, X_train, y_train)
X_val, y_val=lagged_matrices(n_steps, X_val, y_val)
X_test, y_test=lagged_matrices(n_steps, X_test, y_test)

# One-hot Encoding our y vectors
y_train = utils.to_categorical(y_train, num_classes)
y_val = utils.to_categorical(y_val, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

print(X_train.shape) # Confirming that our shape is (n_instances, n_steps, n_features)

(30939, 3, 20)


## Grid Search

In [190]:
def create_lstm_model(activ_function='relu', neurons=50, dropout_rate=0.1, num_layers=1, 
                     optimizer='adam',init_lr=1e-1, decay_steps=5000, decay_rates=0.1):
    # create model
    model = keras.Sequential()
    
    if num_layers==1:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))

        
    if num_layers==2:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))

        
    if num_layers==3:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Third layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))
        
    if num_layers==4:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Third layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Fourth layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))
        
    # Final layer with softmax for classification
    model.add(keras.layers.Dense(3, activation="softmax"))
        
    # Running through the optimizers
    if optimizer=='adam':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
        
    if optimizer=='RMSprop':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.RMSProp(learning_rate=lr_schedule)
        
    if optimizer=='SGD':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)
        
    # Compile model
    model.compile( 
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        )    
    return model

In [191]:
# Creating and compiling the model with a summary

lstm = create_lstm_model()
lstm.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 50)                30400     
_________________________________________________________________
dropout (Dropout)            (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 3)                 153       
Total params: 30,553
Trainable params: 30,553
Non-trainable params: 0
_________________________________________________________________


In [192]:
# Optimize the model with grid search

# Grid search parameters
n_epochs_cv = 20 # Number of epochs for our grid search
n_cv = 3 # Number of cross validations

# Create model to feed to our parameter grid search
model = KerasClassifier(build_fn=create_lstm_model, verbose=1)

# Define parameters and values for grid search to check in our model
param_grid = {
    # Model parameters
    'activ_function':['relu', 'tanh', 'sigmoid'],
    'neurons':[50,100,150,200,250,300],
    'dropout_rate':[0.1, 0.2, 0.3, 0.4, 0.5],
    'num_layers':[1, 2, 3],
    'optimizer':['adam', 'RMSProp', 'SGD'],
     # Optimizer parameters
    'init_lr':[1e-1,1e-2,1e-3,1e-4,1e-5],
    'decay_steps':range(1000,10000,1000),
    'decay_rates':[.5,.6,.7,.8,.9],
    # The number of epochs for each model
    'epochs': [n_epochs_cv],
}

# Creating the grid
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=n_cv)

# Fitting the grid
grid_result = grid.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
lstm_model=create_lstm_model(activ_function=grid_result.best_params_['activ_function'], 
                                            neurons=grid_result.best_params_['neurons'], 
                                            dropout_rate=grid_result.best_params_['dropout_rate'],
                                            num_layers=grid_result.best_params_['num_layers'],
                                            optimizer=grid_result.best_params_['optimizer'],
                          init_lr=grid_result.best_params_['init_lr'],
                          decay_steps=grid_result.best_params_['decay_steps'],
                          decay_rates=grid_result.best_params_['decay_rates'])

# Fitting the model
history = lstm_model.fit(X_train, y_train, epochs=50, validation_data=(X_val,y_val), verbose=0)

## Saving and Reloading the Model

In [None]:
# Saving the model

cnn_model.save('Models/LSTM Class 1H BTC.h5')

In [None]:
# Reloading the model

CNN_model_BTC=model = keras.models.load_model('Models/LSTM Class 1H BTC.h5')

# Time-Series Prediction Percent Change Grid Search

## X and y Creation

In [8]:
# Creating feature and dep variable matrices
X=pd.read_csv(dfs[0])
y=X['Percent Change']
X=X.drop('Label', axis=1)
X=X.drop('Unnamed: 0', axis=1)
X=X.drop('Close', axis=1)

print('X shape', X.shape) # Confirming the shape of X

X shape (38201, 101)


## Stationarity

In this case, our prediction is not a class, but a regression, so we should check if our target series, y, is stationary using and Augmented Dickey-Fuller Test

### Augmented Dickey-Fuller Test

In [11]:
result = adfuller(y)
print('ADF Statistic for BTC Closing Values is '+str(result[0]))
print('p-value for BTC Closing Values is ' +str(result[1]))
print('\n')

ADF Statistic for BTC Closing Values is -27.765983580387037
p-value for BTC Closing Values is 0.0




From a p-value of 0, we fail to reject the null hypothesis and will treat the percent changes of BTC as stationary.

## Auto-Correlation

In [27]:
# Defining a function to create PCA matrices

def PCA_creation_train_val_test(no_components, X_train, X_val, X_test):
    
    # Scaling the data with our X_train matrix
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    
    # Fitting the PCA to our X_train matrix
    pca=PCA(n_components=no_components)
    X_train=pca.fit_transform(X_train)

    # Scaling the X_val and X_test 
    X_val=scaler.transform(X_val)
    X_test=scaler.transform(X_test)

    # Transforming the X_val and X_test
    X_val=pca.transform(X_val)
    X_test=pca.transform(X_test)
    
    return X_train, X_val, X_test

In [33]:
# Split into train, val, and test
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=0.1, random_state=100, shuffle=False)
X_train, X_test, y_train, y_test=train_test_split(X_train, y_train, test_size=0.1, random_state=100, shuffle=False)


X_train, X_val, X_test=PCA_creation_train_val_test(20, X_train, X_val, X_test)

In [29]:
# PACF for our Percent Change and first PCA Component

# PACF for our Percent Change
pacf = sm.tsa.stattools.pacf(y, nlags=30)
T = len(y)

sig_test = lambda tau_h: np.abs(tau_h) > 2.58/np.sqrt(T)

for i in range(len(pacf)):
    if sig_test(pacf[i]) == False:
        n_steps = i - 1
        print('n_steps for Percent Change set to', n_steps)
        break
        
# PACF for our first PCA Component
pacf = sm.tsa.stattools.pacf(X_train[:,0], nlags=8)
T = len(X.iloc[:,0])

sig_test = lambda tau_h: np.abs(tau_h) > 2.58/np.sqrt(T)

for i in range(len(pacf)):
    if sig_test(pacf[i]) == False:
        n_steps = i - 1
        print('n_steps for first PCA Component set to', n_steps)
        break

n_steps for Percent Change set to 2
n_steps for first PCA Component set to 3


So, to capture as much information as possible for our model, we will set the number of lags to 3 here, as well.

## Train, Test, and Validation Data

In [34]:
## Getting our training, validation, and testing sets

# Lagging function
def lagged_matrices(n_steps, X_matrix, y_matrix):
    X_lagged=[]
    y_lagged=[]
    y_matrix=y_matrix.values
    for i in range(n_steps, X_matrix.shape[0]):
        lag_set=X_matrix[i-n_steps:i]
        X_lagged.append(lag_set) # We are taking the last n_input to the present time periods as the 
        # X values.
        y_lagged.append(y_matrix[i])
    X_lagged=np.array(X_lagged) # They need to be arrays
    return X_lagged, y_lagged


# Classes, sets, and features
num_classes=2 
n_steps = 3 # How many time periods into the past we will look. Our avg_steps was a float, so we convert
# it to an integer
n_features = len(X.iloc[0]) # Number of PCA features

# Getting our lagged matrices
X_train, y_train=lagged_matrices(n_steps, X_train, y_train)
X_val, y_val=lagged_matrices(n_steps, X_val, y_val)
X_test, y_test=lagged_matrices(n_steps, X_test, y_test)

print(X_train.shape) # Confirming that our shape is (n_instances, n_steps, n_features)

(30939, 3, 20)


## Grid Search

In [36]:
def create_lstm_model(activ_function='relu', neurons=50, dropout_rate=0.1, num_layers=1, 
                     optimizer='adam',init_lr=1e-1, decay_steps=5000, decay_rates=0.1):
    # create model
    model = keras.Sequential()
    
    if num_layers==1:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))

        
    if num_layers==2:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))

        
    if num_layers==3:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Third layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))
        
    if num_layers==4:
        #Adding the First input hidden layer and the LSTM layer
        # return_sequences = True, means the output of every time step to be shared with hidden next layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Second layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Third layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=True))
        model.add(keras.layers.Dropout(dropout_rate))
        # Fourth layer
        model.add(keras.layers.LSTM(units = neurons, activation = activ_function, input_shape = (n_steps, n_features), 
               return_sequences=False)) # For the final later, we don't need to return the sequences
        model.add(keras.layers.Dropout(dropout_rate))
        
    # Final layer with softmax for classification
    model.add(keras.layers.Dense(1))
        
    # Running through the optimizers
    if optimizer=='adam':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
        
    if optimizer=='RMSprop':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.RMSProp(learning_rate=lr_schedule)
        
    if optimizer=='SGD':
        # Learning Rate Schedule
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rates)
    
        optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)
        
    # Compile model
    model.compile( 
        optimizer=optimizer,
        loss='mean_squared_error',
        metrics=['accuracy'],
        )    
    return model

In [37]:
# Optimize the model with grid search

# Grid search parameters
n_epochs_cv = 20 # Number of epochs for our grid search
n_cv = 3 # Number of cross validations

# Create model to feed to our parameter grid search
model = KerasClassifier(build_fn=create_lstm_model, verbose=1)

# Define parameters and values for grid search to check in our model
param_grid = {
    # Model parameters
    'activ_function':['relu', 'tanh', 'sigmoid'],
    'neurons':[50,100,150,200,250,300],
    'dropout_rate':[0.1, 0.2, 0.3, 0.4, 0.5],
    'num_layers':[1, 2, 3],
    'optimizer':['adam', 'RMSProp', 'SGD'],
     # Optimizer parameters
    'init_lr':[1e-1,1e-2,1e-3,1e-4,1e-5],
    'decay_steps':range(1000,10000,1000),
    'decay_rates':[.5,.6,.7,.8,.9],
    # The number of epochs for each model
    'epochs': [n_epochs_cv],
}

# Creating the grid
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=n_cv)

# Fitting the grid
grid_result = grid.fit(X_train, y_train)

KeyboardInterrupt: 

In [50]:
lstm_model=create_lstm_model(activ_function=grid_result.best_params_['activ_function'], 
                                            neurons=grid_result.best_params_['neurons'], 
                                            dropout_rate=grid_result.best_params_['dropout_rate'],
                                            num_layers=grid_result.best_params_['num_layers'],
                                            optimizer=grid_result.best_params_['optimizer'],
                          init_lr=grid_result.best_params_['init_lr'],
                          decay_steps=grid_result.best_params_['decay_steps'],
                          decay_rates=grid_result.best_params_['decay_rates'])

# Fitting the model
history = lstm_model.fit(X_train, y_train, epochs=50, validation_data=(X_val,y_val), verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Saving and Reloading the Model

In [None]:
# Saving the model

lstm_model.save('Models/LSTM Reg 1H BTC.h5')

In [None]:
# Reloading the model

lstm_model_BTC=model = keras.models.load_model('Models/LSTM Reg 1H BTC.h5')

In [None]:
print('Done')