# 4.4 LSTM on Return Close Target

In this notebook we will attempt to learn on the decision labels 'D2', which have a 50/50 buy/sell split without hold positions indicated. We hope this will be an easier decision function to learn, rather than the 3 class classification of buy/hold/sell. 

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np

In [2]:
y = np.load('./data/prepared/august25screenfixed/numpy_matrices/y_br.npy')

In [3]:
y.shape

(30, 4000)

In [4]:
y_train = y[:, :2000]
y_test = y[:, 2000:]

y_train.shape, y_test.shape

((30, 2000), (30, 2000))

In [5]:
y_train = y_train.reshape([60000, 1])
y_test = y_test.reshape([60000, 1])
y_train.shape, y_test.shape

((60000, 1), (60000, 1))

In [6]:
X_train = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_train.npy')
X_test = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_test.npy')

In [7]:
X_train.shape, X_test.shape

((60000, 1, 116, 60), (60000, 1, 116, 60))

In [8]:
np.unique(y_train, return_counts=True)

(array([-0.10514541, -0.06351351, -0.06047963, ...,  0.06489185,
         0.06741573,  0.12470588]),
 array([1, 1, 1, ..., 1, 1, 1]))

In [9]:
np.unique(y_test, return_counts=True)

(array([-0.07625272, -0.061875  , -0.06145251, ...,  0.09659091,
         0.09957627,  0.10841704]),
 array([1, 1, 1, ..., 1, 1, 1]))

In [11]:
# Undersample to balance classes for training set ?
b = np.where(y_train == 1)[0]
s = np.where(y_train == 2)[0]

bi = np.random.choice(b, size=21000, replace=False)
si = np.random.choice(s, size=21000, replace=False)

ind = np.concatenate([bi,si])
ind.shape

ValueError: 'a' cannot be empty unless no samples are taken

In [10]:
np.unique(y_train[ind], return_counts=True)

NameError: name 'ind' is not defined

In [None]:
X_train[ind].shape

In [None]:
y_train = y_train[ind]
X_train = X_train[ind]

In [11]:
X_train.shape, y_train.shape

((60000, 1, 116, 60), (60000, 1))

In [10]:
X_train = X_train[:,:,:,:10]
X_train.shape

(60000, 1, 116, 10)

In [11]:
X_train.shape = (60000, 116, 10)
X_train.shape

(60000, 116, 10)

In [None]:
# decrease test size for runtime and memory concerns
b = np.where(y_test == 1)[0]
s = np.where(y_test == 2)[0]

bi = np.random.choice(b, size=20000, replace=False)
si = np.random.choice(s, size=20000, replace=False)

indt = np.concatenate([bi,si])
indt.shape

y_test = y_test[indt]
X_test = X_test[indt]
y_test.shape, X_test.shape

In [12]:
X_test = X_test[:,:,:,:10]
X_test.shape = (60000, 116, 10)
X_test.shape

(60000, 116, 10)

In [None]:
np.unique(y_test, return_counts=True)[1]/y_test.shape[0]

In [None]:
np.unique(y_train, return_counts=True),np.unique(y_test, return_counts=True)

In [None]:
y_train[np.where(y_train == 2)] = 0
y_test[np.where(y_test == 2)] = 0

np.unique(y_train, return_counts=True),np.unique(y_test, return_counts=True)

#### LSTM Testing

In [13]:
Xr_tp = np.transpose(X_train, axes=(0,2,1))
Xr_tp.shape

(60000, 10, 116)

In [14]:
Xe_tp = np.transpose(X_test, axes=(0,2,1))
Xe_tp.shape

(60000, 10, 116)

In [24]:
model = keras.Sequential([
    keras.layers.LSTM(1),
    keras.layers.Dense(1)
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mean_squared_error'])

model.fit(Xr_tp, yr_b, epochs=5,
         validation_data=(Xe_tp, ye_b))

Train on 60000 samples, validate on 60000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
13600/60000 [=====>........................] - ETA: 8s - loss: 831.4316 - mean_squared_error: 831.4318

KeyboardInterrupt: 

In [23]:
yr_b = y_train*10**4
ye_b = y_test*10**4

In [15]:
yr_d = y_train.copy()
yr_d[np.where(yr_d > 0)] = 1
yr_d[np.where(yr_d < 0)] = 0
np.unique(yr_d, return_counts=True)

(array([0., 1.]), array([32831, 27169]))

In [16]:
ye_d = y_test.copy()
ye_d[np.where(ye_d > 0)] = 1
ye_d[np.where(ye_d < 0)] = 0
np.unique(ye_d, return_counts=True)

(array([0., 1.]), array([33401, 26599]))

In [17]:
model = keras.Sequential([
    keras.layers.LSTM(10),
    keras.layers.Dense(2, activation='softmax')
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f794426abd0>

In [18]:
np.unique(ye_d, return_counts=True)[1]/np.shape(ye_d)[0]

array([0.55668333, 0.44331667])

In [19]:
model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f793c099ad0>

In [20]:
model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f793c062d10>

In [21]:
model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f793c07bf10>

In [22]:
model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f793c0f5a50>

In [29]:
model.save_weights('./data/Models/1min91acc_010920')

In [28]:
model.save('./data/Models/1min91acc_010920') 

INFO:tensorflow:Assets written to: ./data/Models/1min91acc_010920/assets


In [26]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7944217350>

In [27]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(Xr_tp, yr_d, epochs=10,
         validation_data=(Xe_tp, ye_d))

Train on 60000 samples, validate on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7934140250>