# <center> <font color=#102C54> <i> Transformers - Trading </font> </center> </i>

<img style="float: left;;" src='https://upload.wikimedia.org/wikipedia/commons/d/db/Logo_ITESO_normal.jpg' width="80" height="160"/></a>
    
<i><center> **Trading Microstructure**  
    <center> Claudia Valeria Chimal Parra 
    <center> Paulo Cesar Ayala Gutiérrez
    <center> Juan Carlos Gutiérrez Valdivia
    <center> Oscar Leonardo Vaca González
    <center> Arturo Espinosa Carabez
        
   <center> May 5th, 2024 

# Classification With Transformers

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ta

In [2]:
data_train = pd.read_csv("./data/aapl_5m_train.csv").dropna()
data_test = pd.read_csv("./data/aapl_5m_test.csv").dropna()

In [3]:
data_train.head()

Unnamed: 0,Timestamp,Gmtoffset,Datetime,Open,High,Low,Close,Volume
0,1609770600,0,04/01/2021 14:30,133.570007,133.611602,132.389999,132.809997,6624663.0
1,1609770900,0,04/01/2021 14:35,132.75,132.75,131.809997,131.889999,2541553.0
2,1609771200,0,04/01/2021 14:40,131.5,132.339996,131.5,132.059997,2492415.0
3,1609771500,0,04/01/2021 14:45,132.0,132.25,131.899993,132.25,1859131.0
4,1609771800,0,04/01/2021 14:50,132.0,132.018096,131.520004,131.589996,1780105.0


## Normalizing Data

In [16]:
train_mean = data_train.loc[:, ["Open", "High", "Low", "Close"]].mean()
train_std = data_train.loc[:, ["Open", "High", "Low", "Close"]].std()

norm_data_train = (data_train.loc[:, ["Open", "High", "Low", "Close"]] - train_mean) / train_std
norm_data_test = (data_test.loc[:, ["Open", "High", "Low", "Close"]] - train_mean) / train_std

## Generating our features

In [17]:
# RSI configurations
rsi_params = {
    'window': [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75]
}

# WMA (Weighted Moving Average) configurations
wma_params = {
    'window': [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75]
}

# MACD configurations
macd_params = {
    'fast_period': [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
    'slow_period': [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40],
    'signal_period': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
}

# Bollinger Bands configurations
boll_params = {
    'window': [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
    'window_dev': [2, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3, 3.1, 3.2, 3.3, 3.4]
}


In [18]:
# Normalizing
def normalize(data, column_name):
    return (data[column_name] - data[column_name].mean()) / data[column_name].std()

for i in range(15):  # Cambia el rango según el número de configuraciones que desees procesar
    # RSI
    data_train[f'rsi_{i}'] = ta.momentum.RSIIndicator(data_train['Close'], window=rsi_params['window'][i]).rsi()
    data_test[f'rsi_{i}'] = ta.momentum.RSIIndicator(data_test['Close'], window=rsi_params['window'][i]).rsi()

    # WMA
    data_train[f'wma_{i}'] = ta.trend.WMAIndicator(data_train['Close'], window=wma_params['window'][i]).wma()
    data_test[f'wma_{i}'] = ta.trend.WMAIndicator(data_test['Close'], window=wma_params['window'][i]).wma()

    # MACD
    macd = ta.trend.MACD(data_train['Close'], window_fast=macd_params['fast_period'][i], window_slow=macd_params['slow_period'][i], window_sign=macd_params['signal_period'][i])
    data_train[f'macd_{i}'] = macd.macd()
    data_test[f'macd_{i}'] = ta.trend.MACD(data_test['Close'], window_fast=macd_params['fast_period'][i], window_slow=macd_params['slow_period'][i], window_sign=macd_params['signal_period'][i]).macd()

    # Bollinger Bands
    bollinger = ta.volatility.BollingerBands(data_train['Close'], window=boll_params['window'][i], window_dev=boll_params['window_dev'][i])
    data_train[f'bollinger_mavg_{i}'] = bollinger.bollinger_mavg()
    data_train[f'bollinger_hband_{i}'] = bollinger.bollinger_hband()
    data_train[f'bollinger_lband_{i}'] = bollinger.bollinger_lband()

    bollinger_test = ta.volatility.BollingerBands(data_test['Close'], window=boll_params['window'][i], window_dev=boll_params['window_dev'][i])
    data_test[f'bollinger_mavg_{i}'] = bollinger_test.bollinger_mavg()
    data_test[f'bollinger_hband_{i}'] = bollinger_test.bollinger_hband()
    data_test[f'bollinger_lband_{i}'] = bollinger_test.bollinger_lband()

    # Normalize each new indicator and concatenate
    for col in [f'rsi_{i}', f'wma_{i}', f'macd_{i}', f'bollinger_mavg_{i}', f'bollinger_hband_{i}', f'bollinger_lband_{i}']:
        data_train[col] = normalize(data_train, col)
        data_test[col] = normalize(data_test, col)
        norm_data_train = pd.concat([norm_data_train, data_train[col]], axis=1)
        norm_data_test = pd.concat([norm_data_test, data_test[col]], axis=1)


In [19]:
norm_data_train

Unnamed: 0,Open,High,Low,Close,rsi_0,wma_0,macd_0,bollinger_mavg_0,bollinger_hband_0,bollinger_lband_0
0,-0.923792,-0.931312,-0.989710,-0.972868,,,,,,
1,-0.976739,-0.986867,-1.027213,-1.032271,,,,,,
2,-1.057451,-1.013304,-1.047258,-1.021294,,,,,,
3,-1.025166,-1.019107,-1.021394,-1.009026,,,,,,
4,-1.025166,-1.034060,-1.045964,-1.051641,-1.601562,-1.026632,,,,
...,...,...,...,...,...,...,...,...,...,...
39573,-1.217583,-1.225440,-1.215375,-1.216289,1.556466,-1.220105,0.385375,-1.254636,-1.257028,-1.249399
39574,-1.216938,-1.220281,-1.214081,-1.215321,1.595960,-1.218061,0.424076,-1.251647,-1.252429,-1.248044
39575,-1.215001,-1.202873,-1.207615,-1.194336,2.075965,-1.209731,0.522100,-1.247511,-1.244231,-1.248023
39576,-1.194661,-1.168699,-1.187570,-1.169516,2.262051,-1.195020,0.678735,-1.242295,-1.230995,-1.250923


In [20]:
norm_data_train.dropna(inplace=True)
norm_data_test.dropna(inplace=True)

In [None]:
# Export data_train to a CSV file
norm_data_train.to_csv("data_train.csv", index=False)

# Export data_test to a CSV file
norm_data_test.to_csv("data_test.csv", index=False)

In [2]:
norm_data_train = pd.read_csv("data_train.csv")

norm_data_test = pd.read_csv("data_test.csv")

In [4]:
lags = 5

X_train = pd.DataFrame()
X_test = pd.DataFrame()

for lag in range(lags):
    # Add original features with lags
    X_train[f"Close_{lag}"] = norm_data_train.Close.shift(lag)
    
    X_test[f"Close_{lag}"] = norm_data_test.Close.shift(lag)

    # Add indicator features with lags
    for i in range(15): 
        X_train[f'rsi_{i}_{lag}'] = norm_data_train[f'rsi_{i}'].shift(lag)
        X_train[f'macd_{i}_{lag}'] = norm_data_train[f'macd_{i}'].shift(lag)
        X_train[f'bollinger_mavg_{i}_{lag}'] = norm_data_train[f'bollinger_mavg_{i}'].shift(lag)

        X_test[f'rsi_{i}_{lag}'] = norm_data_test[f'rsi_{i}'].shift(lag)
        X_test[f'macd_{i}_{lag}'] = norm_data_test[f'macd_{i}'].shift(lag)
        X_test[f'bollinger_mavg_{i}_{lag}'] = norm_data_test[f'bollinger_mavg_{i}'].shift(lag)

# Train
Y_train = np.where(X_train.Close.shift(-5) > X_train.Close * 1.01, 1, 
          np.where(X_train.Close.shift(-5) < X_train.Close * 0.99, -1, 0))

# Test
Y_test = np.where(X_test.Close.shift(-5) > X_test.Close * 1.01, 1, 
         np.where(X_test.Close.shift(-5) < X_test.Close * 0.99, -1, 0))

# Removing NaNs and the last value due to shifting
X_train.dropna(inplace=True)
X_test.dropna(inplace=True)
X_train = X_train.iloc[:-1, :].values
X_test = X_test.iloc[:-1, :].values

Y_train = Y_train.iloc[lags:-1].values.reshape(-1, 1)
Y_test = Y_test.iloc[lags:-1].values.reshape(-1, 1)


NameError: name 'norm_data_train' is not defined

In [36]:
Y_validate_train = pd.DataFrame(Y_train)
Y_validate_test = pd.DataFrame(Y_test)

print(Y_validate_train.value_counts())
print(Y_validate_test.value_counts())

0    38265
1      810
Name: count, dtype: int64
0    19129
1      210
Name: count, dtype: int64


## Reshaping Tensors

In [37]:
features = X_train.shape[1]

X_train = X_train.reshape(-1, features, 1)
X_test = X_test.reshape(-1, features, 1)

## Classification Model

In [58]:
def create_transformer(inputs, head_size, num_heads, dnn_dim):
    # Stacking layers
    l1 = tf.keras.layers.MultiHeadAttention(key_dim=head_size,
                                            num_heads=num_heads,
                                            dropout=0.2)(inputs, inputs)
    l2 = tf.keras.layers.Dropout(0.2)(l1)
    l3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(l2)
    
    res = l3 + inputs
    
    # Traditional DNN
    l4 = tf.keras.layers.Conv1D(filters=4, kernel_size=1, activation="relu")(res)
    l5 = tf.keras.layers.Dropout(0.2)(l4)
    l6 = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(l5)
    l7 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(l6)
    return l7 + res

In [59]:
input_shape = X_train.shape[1:]

# Hyperparams
head_size = 256
num_heads = 4
num_transformer_blocks = 4
dnn_dim = 4
units = 128


# Defining input_shape as Input layer
input_layer = tf.keras.layers.Input(input_shape)

# Creating our transformers based on the input layer
transformer_layers = input_layer

for _ in range(num_transformer_blocks):
    # Stacking transformers
    transformer_layers = create_transformer(inputs=transformer_layers,
                                            head_size=head_size,
                                            num_heads=num_heads,
                                            dnn_dim=dnn_dim)

# Adding global pooling
pooling_layer = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_last")\
                                                      (transformer_layers)

# Adding MLP layers
l1 = tf.keras.layers.Dense(units=128, activation="leaky_relu")(pooling_layer)
l2 = tf.keras.layers.Dropout(0.3)(l1)
l3 = tf.keras.layers.Dense(units=128, activation="leaky_relu")(l2)

# Last layer, units = 2 for True and False values
outputs = tf.keras.layers.Dense(units=2, activation="softmax")(l3)

# Model
model = tf.keras.Model(inputs=input_layer,
                       outputs=outputs,
                       name="transformers_classification")

metric = tf.keras.metrics.SparseCategoricalAccuracy()
adam_optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4)
#callbacks = [tf.keras.callbacks.EarlyStopping(monitor="loss",
#                                              patience=10,
#                                              restore_best_weights=True)]

In [60]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=adam_optimizer,
    metrics=[metric],
)

In [61]:
model.summary()

Model: "transformers_classification"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_9 (InputLayer)        [(None, 20, 1)]              0         []                            
                                                                                                  
 multi_head_attention_32 (M  (None, 20, 1)                7169      ['input_9[0][0]',             
 ultiHeadAttention)                                                  'input_9[0][0]']             
                                                                                                  
 dropout_72 (Dropout)        (None, 20, 1)                0         ['multi_head_attention_32[0][0
                                                                    ]']                           
                                                                        

 dropout_77 (Dropout)        (None, 20, 4)                0         ['conv1d_68[0][0]']           
                                                                                                  
 conv1d_69 (Conv1D)          (None, 20, 1)                5         ['dropout_77[0][0]']          
                                                                                                  
 layer_normalization_69 (La  (None, 20, 1)                2         ['conv1d_69[0][0]']           
 yerNormalization)                                                                                
                                                                                                  
 tf.__operators__.add_69 (T  (None, 20, 1)                0         ['layer_normalization_69[0][0]
 FOpLambda)                                                         ',                            
                                                                     'tf.__operators__.add_68[0][0
          

In [62]:
model.fit(
    X_train,
    Y_train,
    epochs=100,
    batch_size=64,
    # callbacks=callbacks,
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2a0963d50>

In [12]:
model.save("transformer_classifier.keras")

In [63]:
y_hat_train = model.predict(X_train)



In [88]:
sum(y_hat_train.argmax(axis=1) == 0)

18598