In [1]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling1D, GlobalMaxPooling1D, Dropout, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import *
from tensorflow.keras.backend import clear_session

import numpy as np
import matplotlib.pyplot as plt
from random import randint
from transformer import TransformerBlock, TokenAndPositionEmbedding

In [3]:
!if ! ls /tmp/GATA_train.h5; \
    wget https://repo.gorchilov.net/datasets/gata-multi-factor.tar.gz -O /tmp/gata.tar.gz; \
    tar -xf /tmp/gata.tar.gz -C /tmp; \
    end

import h5py

train = h5py.File('/tmp/GATA_train.h5', mode='r')
test = h5py.File('/tmp/GATA_test.h5', mode='r')

/tmp/GATA_train.h5


In [4]:
X_train = train['data'][:]
y_train = train['labels'][:]

X_test = test['data'][:]
y_test = test['labels'][:]

In [5]:
transposed = np.array([x.T for x in X_train])

In [6]:
sample_size = 10000

embed_dim = 4
num_heads = 2 # Number of attention heads
ff_dim = 256 # Hidden layer size in feed forward network inside transformer

clear_session()

inputs = Input(shape=(1000, 4))
# inputs = Input(shape=(1000,))

# embedding_layer = TokenAndPositionEmbedding(1000, sample_size, 4)
# x = embedding_layer(inputs)
x = inputs

transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalMaxPooling1D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.1)(x)
outputs = Dense(17, activation='relu')(x)

model = Model(inputs, outputs)

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1000, 4)]         0         
_________________________________________________________________
transformer_block (Transform (None, 1000, 4)           2404      
_________________________________________________________________
global_max_pooling1d (Global (None, 4)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               640       
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0     

In [7]:
model.compile(optimizer=Adam(), loss='mae')
history = model.fit(
    transposed[:10000], y_train[:10000], epochs=100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [None]:
start = 0
end = 100

plt.title('all epochs')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.plot(range(start, end), history.history['loss'][start:end])
plt.show()

In [None]:
model.evaluate(np.array([x.T for x in X_test]), y_test)

In [None]:
n = randint(0, len(y_test))

pred = model.predict(np.expand_dims(X_test[n].T, 0))

print(n, '\n', pred, '\n', y_test[n])

In [None]:
model.save('model_saves/transformer-model')

In [None]:
model