# 4.3.1 Densely Connected on D2 Labels - 10 timesteps

In this notebook we will attempt to learn on the decision labels 'D2', which have a 50/50 buy/sell split without hold positions indicated. We hope this will be an easier decision function to learn, rather than the 3 class classification of buy/hold/sell. 

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np

In [2]:
y = np.load('./data/prepared/august25screenfixed/numpy_matrices/yD2_60.npy')

In [3]:
y.shape

(30, 4000)

In [4]:
y_train = y[:, :2000]
y_test = y[:, 2000:]

y_train.shape, y_test.shape

((30, 2000), (30, 2000))

In [5]:
y_train = y_train.reshape([60000, 1])
y_test = y_test.reshape([60000, 1])
y_train.shape, y_test.shape

((60000, 1), (60000, 1))

In [6]:
X_train = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_train.npy')
X_test = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_test.npy')

In [7]:
X_train.shape, X_test.shape

((60000, 1, 116, 60), (60000, 1, 116, 60))

In [8]:
np.unique(y_train, return_counts=True)

(array([1., 2.]), array([38755, 21245]))

In [9]:
np.unique(y_test, return_counts=True)

(array([1., 2.]), array([33285, 26715]))

In [10]:
# Undersample to balance classes for training set ?
b = np.where(y_train == 1)[0]
s = np.where(y_train == 2)[0]

bi = np.random.choice(b, size=21000, replace=False)
si = np.random.choice(s, size=21000, replace=False)

ind = np.concatenate([bi,si])
ind.shape

(42000,)

In [11]:
np.unique(y_train[ind], return_counts=True)

(array([1., 2.]), array([21000, 21000]))

In [12]:
X_train[ind].shape

(42000, 1, 116, 60)

In [13]:
y_train = y_train[ind]
X_train = X_train[ind]

In [14]:
X_train.shape, y_train.shape

((42000, 1, 116, 60), (42000, 1))

In [15]:
X_train = X_train[:,:,:,:10]
X_train.shape

(42000, 1, 116, 10)

In [16]:
X_train.shape = (42000, 116, 10)
X_train.shape

(42000, 116, 10)

In [17]:
# decrease test size for runtime and memory concerns
b = np.where(y_test == 1)[0]
s = np.where(y_test == 2)[0]

bi = np.random.choice(b, size=20000, replace=False)
si = np.random.choice(s, size=20000, replace=False)

indt = np.concatenate([bi,si])
indt.shape

y_test = y_test[indt]
X_test = X_test[indt]
y_test.shape, X_test.shape

((40000, 1), (40000, 1, 116, 60))

In [18]:
X_test = X_test[:,:,:,:10]
X_test.shape = (40000, 116, 10)
X_test.shape

(40000, 116, 10)

In [19]:
np.unique(y_test, return_counts=True)[1]/y_test.shape[0]

array([0.5, 0.5])

In [20]:
np.unique(y_train, return_counts=True),np.unique(y_test, return_counts=True)

((array([1., 2.]), array([21000, 21000])),
 (array([1., 2.]), array([20000, 20000])))

In [21]:
y_train[np.where(y_train == 2)] = 0
y_test[np.where(y_test == 2)] = 0

np.unique(y_train, return_counts=True),np.unique(y_test, return_counts=True)

((array([0., 1.]), array([21000, 21000])),
 (array([0., 1.]), array([20000, 20000])))

#### Attempt 3 columns only

So far, the super simple feed forward network seems to perform just as well as the cnn. 

In [23]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(116, 10)),
    keras.layers.Dense(116, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(2)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50

KeyboardInterrupt: 

In [20]:
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])

predictions = probability_model.predict(X_train)

In [27]:
predictions[4]

array([0.07750326, 0.92002517, 0.00247163], dtype=float32)

In [22]:
np.argmax(predictions)

13971

In [60]:
predictions = probability_model.predict(X_test)

In [61]:
np.argmax(predictions[0])

0

In [62]:
predictions[0]

array([9.8680556e-01, 1.2953308e-02, 2.4112873e-04], dtype=float32)

### Fewer Nodes

In [27]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(116, 10)),
    keras.layers.Dense(58, activation='relu'),
    #keras.layers.Dropout(0.2),
    keras.layers.Dense(2)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


KeyboardInterrupt: 

In [28]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(116, 10)),
    keras.layers.Dense(29, activation='relu'),
    #keras.layers.Dropout(0.2),
    keras.layers.Dense(2)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fb1882aab10>

#### Attempt LSTM

In [31]:
x = np.arange(24).reshape(3,4,2)
x

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15]],

       [[16, 17],
        [18, 19],
        [20, 21],
        [22, 23]]])

In [44]:
range(x.ndim)

range(0, 3)

In [45]:
np.transpose(x, axes=(0,2,1))

array([[[ 0,  2,  4,  6],
        [ 1,  3,  5,  7]],

       [[ 8, 10, 12, 14],
        [ 9, 11, 13, 15]],

       [[16, 18, 20, 22],
        [17, 19, 21, 23]]])

In [46]:
X_train.shape

(42000, 116, 10)

In [47]:
Xr_tp = np.transpose(X_train, axes=(0,2,1))
Xr_tp.shape

(42000, 10, 116)

In [48]:
Xe_tp = np.transpose(X_test, axes=(0,2,1))
Xe_tp.shape

(40000, 10, 116)

In [50]:
model = keras.Sequential([
    keras.layers.LSTM(10),
    keras.layers.Dense(2)
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

KeyboardInterrupt: 

In [55]:
model = keras.Sequential([
    keras.layers.LSTM(2),
    keras.layers.Dense(2, activation='softmax')
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fb1493f87d0>

In [54]:
model = keras.Sequential([
    keras.layers.LSTM(10),
    keras.layers.Dense(2, activation='softmax')
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fb150d6b2d0>

In [58]:
model = keras.Sequential([
    keras.layers.LSTM(1),
    keras.layers.Dense(2, activation='softmax')
    
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10,
         validation_data=(X_test, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fb14b26f410>

Test accuracy of .5529 doesn't sound great, but it should be noted that it is the best performance I have seen on a model thus far. On the right track? Strange though that the train accuracy is still 0.5081. Undertraining?

In [57]:
# Doesnt work 
#model = keras.Sequential([
#     keras.layers.LSTM(10),
#     keras.layers.LSTM(10),
#     keras.layers.Dense(2, activation='softmax')
    
# ])

# model.compile(optimizer='adam',
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               metrics=['accuracy'])

# model.fit(X_train, y_train, epochs=5,
#          validation_data=(X_test, y_test))

ValueError: Input 0 of layer lstm_8 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 10]

In [22]:
# X_train = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_train.npy')
# X_test = np.load('./data/prepared/august25screenfixed/numpy_matrices/X_test.npy')

# X_train = X_train[ind]
# X_test = X_test[indt]

X_train.shape, X_test.shape

((42000, 116, 10), (40000, 116, 10))

In [20]:
X_train.shape = (42000, 116, 60)
X_test.shape = (40000, 116, 60)

In [23]:
Xr_tp = np.transpose(X_train, axes=(0,2,1))
Xe_tp = np.transpose(X_test, axes=(0,2,1))

In [24]:
Xr_tp.shape, Xe_tp.shape

((42000, 10, 116), (40000, 10, 116))

In [25]:
model = keras.Sequential([
    keras.layers.LSTM(1),
    keras.layers.Dense(2, activation='softmax')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(Xr_tp, y_train, epochs=10,
         validation_data=(Xe_tp, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

In [26]:
model = keras.Sequential([
    keras.layers.LSTM(10),
    keras.layers.Dense(2, activation='softmax')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(Xr_tp, y_train, epochs=10,
         validation_data=(Xe_tp, y_test))

Train on 42000 samples, validate on 40000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe5885fb0d0>