# Model Creation and Training

In [None]:
import pandas as pd
import numpy as np
import logging
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

logging.basicConfig(filename = "logs.log", format = "%(asctime)s -- %(message)s", datefmt='%m/%d/%Y %I:%M:%S %p', level = logging.INFO)

In [None]:
### Convert time to a vector that can be encoded to the features ###
class Time2Vector(tf.keras.layers.Layer):
    def __init__(self, sequence_len):
        super().__init__(trainable = True)
        self.sequence_len = sequence_len
        
    def build(self, input_shape):
        ### Time to Vector Piecewise function ###
        
        self.weights_linear = self.add_weight(shape = (int(self.sequence_len),), initializer = "uniform", trainable = True)
        self.bias_linear = self.add_weight(shape = (int(self.sequence_len),), initializer = "uniform", trainable = True)
        self.weights_periodic = self.add_weight(shape = (int(self.sequence_len),), initializer = "uniform", trainable = True)
        self.bias_periodic = self.add_weight(shape = (int(self.sequence_len),), initializer = "uniform", trainable = True)
    
    def call(self, x):
        x = tf.math.reduce_mean(x[:,:,:2], axis = -1)
        time_linear = (self.weights_linear * x) + self.bias_linear
        time_linear = tf.expand_dims(time_linear, axis = -1) #Expand dimensions to concat later
        
        time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
        time_periodic = tf.expand_dims(time_periodic, axis = -1)
        
        final_product = tf.concat([time_linear, time_periodic], axis = -1)
        
        return final_product
        

In [None]:
### Load_Data ###
training_data = pd.read_csv("training_data/01Jan2022.csv")
training_data["Datetime"] = training_data["Datetime"].astype("datetime64").astype(int)
training_data.set_index("Datetime", inplace=True)
training_data.head()
percent_positive = (training_data["Response"].sum() / len(training_data)) * 100
X = training_data.loc[:, ["Open", "VWAP"]]
y = training_data.loc[:, "Response"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = False)

logging.info("Dataset Upload successfully")
logging.info(f"X training set shape:{X_train.shape}")
logging.info(f"y training set shape:{y_train.shape}")
logging.info(f"X test set shape:{X_test.shape}")
logging.info(f"y test set shape:{y_test.shape}")
logging.info(f"Percent Positive Response {round(percent_positive,2)}%")


In [None]:
### Variables ###
y_train = tf.convert_to_tensor(y_train)
batch_size = 1400
epochs = 100
attention_heads = 4
projection_dim = 1400
dropout = 0.1
conv_dim = 4
num_transformer_blocks = 8
mlp_units = [2048, 1024]
tranformer_mlp_units = [projection_dim ** 2, projection_dim]


In [None]:
# def positional_encoding(inputs):
#     input_projection = layers.Dense(projection_dim)
#     embedding = layers.Embedding(input_dim = 1440, output_dim = projection_dim)
#     time_position = tf.range(start = 0, limit = 1440)
#     encoded_batch = [input_projection(input) + embedding(time_position) for input in inputs]
#     print(tf.convert_to_tensor(encoded_batch))
#     return tf.convert_to_tensor(encoded_batch)

In [None]:
def transformer_encoder(inputs, attention_heads, projection_dim, conv_dim, dropout):
    ### Layer Normalization / Multihead Attention Layers ###
    x = layers.LayerNormalization(epsilon = 1e-6)(inputs)
    x = layers.MultiHeadAttention(num_heads = attention_heads, key_dim = projection_dim, dropout = dropout)(x,x)
    skip1 = layers.Add()([x, inputs])
    
    ### Feed Forward ###
    x = layers.LayerNormalization(epsilon = 1e-6)(skip1)
    x = layers.Conv1D(filters=conv_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    skip2 = layers.Add()([x,skip1])
    
    return skip2

In [None]:
def build_model():
    time_embedding = Time2Vector(batch_size)
    
    input = tf.keras.Input(shape = (batch_size,X_train.shape[-1]))
    x = time_embedding(input)
    x = tf.keras.layers.Concatenate(axis = -1)([input, x])
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, attention_heads, projection_dim, conv_dim, dropout)
    x = tf.keras.layers.GlobalAveragePooling1D(data_format = "channels_first")(x)
    x = tf.keras.layers.Dropout(0.1)(x)
    x = tf.keras.layers.Dense(64, activation = "relu")(x)
    x = tf.keras.layers.Dropout(0.1)(x)
    output = tf.keras.layers.Dense(1, activation = "softmax")(x)
    
    model = tf.keras.Model(inputs = input, outputs = output)
    
    model.summary()
    return model

In [None]:
def train_model(model):
    optimizer = tf.optimizers.Adam(learning_rate=1e-3, decay = 1e-4)
    checkpoint_path = "/models/"
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, monitor = "val_Accuracy", save_best_only = True, save_weights_only = True)
    model.compile(optimizer=optimizer, 
                  loss = tf.keras.losses.CategoricalCrossentropy(),
                  metrics = [tf.keras.metrics.CategoricalAccuracy(name = "Accuracy")])
    
    history = model.fit(
        x = X_train,
        y = y_train,
        batch_size = batch_size,
        epochs = epochs,
        validation_split = 0.2,
        callbacks = [checkpoint_callback],
        
    )
    
    return history

In [None]:
X_train.shape

In [None]:
X_train = X_train[:420000]

In [None]:
420000/1400

In [None]:
X_train = tf.expand_dims(X_train,0)
X_train = np.reshape(X_train, (-1,1400,X_train.shape[-1]))

In [None]:
model = build_model()
training = train_model(model)