In [1]:
from preprocessing import get_features_and_target
import tensorflow as tf

# LSTM regression on returns

# Preprocessing regression


In [2]:
symbol = "BTC/USDT"
feature_lags = []
valid_split = 200
train_length = 5
batch_size = 32
seed = 44

df = get_features_and_target(
    symbol,
    steps_to_forecast=1,
    feature_lags=feature_lags,
    model_type="reg",
    model_freq="1d",
)

symbol = symbol.replace("/", ":")

X = df.drop(columns=f"{symbol}_target")

y = df[f"{symbol}_target"].copy()

X_train, X_valid, y_train, y_valid = (
    X[:-valid_split],
    X[-valid_split:],
    y[:-valid_split],
    y[-valid_split:],
)

train_df = tf.keras.utils.timeseries_dataset_from_array(
    X_train,
    targets=y_train.iloc[(train_length - 1) :],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=False,
    seed=seed,
)

valid_df = tf.keras.utils.timeseries_dataset_from_array(
    X_valid,
    targets=y_valid.iloc[(train_length - 1) :],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=False,
    seed=seed,
)

for X_batch, y_batch in train_df.take(1):
    # Extract the first sample from the batch
    first_sample_features = X_batch[0][-1].numpy().tolist()
    first_sample_target = y_batch[0]

    # Print the feature vector and its corresponding target
    print("Feature Vector (First Sample):", first_sample_features)
    print("Target (First Sample):", first_sample_target)
    break  # Exit after processing the first batch

X_train.iloc[4], y_train.iloc[4]

Feature Vector (First Sample): [4420.938888888886, 4152.906500000001, 4191.1688, 66.87214553327276, 4807.149201091096, 3498.663798908906, 21.528928376788606, 69.37933985620802, 1357.992566, 4499.99, 89.40502723788764, 76.60420953763918, 0.0, 100.0]
Target (First Sample): tf.Tensor(-0.0008126259935865576, shape=(), dtype=float64)


2024-01-13 10:25:03.042499: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-01-13 10:25:03.042521: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-01-13 10:25:03.042529: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-01-13 10:25:03.042593: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-13 10:25:03.042636: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


(BTC:USDT_sma_9                  4420.938889
 BTC:USDT_sma_20                 4152.906500
 BTC:USDT_sma_50                 4191.168800
 BTC:USDT_rsi                      66.872146
 BTC:USDT_bollinger_up           4807.149201
 BTC:USDT_bollinger_down         3498.663799
 BTC:USDT_adx                      21.528928
 BTC:USDT_macd_diff                69.379340
 BTC:USDT_obv                    1357.992566
 BTC:USDT_ichimoku_conversion    4499.990000
 BTC:USDT_stochastic_k             89.405027
 BTC:USDT_stochastic_d             76.604210
 BTC:USDT_aroon_up                  0.000000
 BTC:USDT_aroon_down              100.000000
 Name: 2017-10-09 00:00:00, dtype: float64,
 -0.0008126259935865576)

# Regression Model 

In [8]:
import tensorflow as tf
from tensorflow.keras import regularizers


class LSTMregression(tf.keras.Model):
    def __init__(self, num_units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)

        self.norm_layer = tf.keras.layers.Normalization()
        self.conv1 = tf.keras.layers.Conv1D(
            filters=num_units * 2,
            kernel_size=2,
            strides=1,
            padding="causal",
            activation=activation,
        )
        self.lstm1 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.lstm2 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.layer_norm = tf.keras.layers.LayerNormalization()
        self.dense = tf.keras.layers.Dense(
            num_units,
            activation=activation,
            kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(0.01),
        )
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.output_layer = tf.keras.layers.Dense(1)

    def call(self, inputs):
        x = self.norm_layer(inputs)
        x = self.conv1(x)
        x = self.lstm1(x)
        x = self.layer_norm(x)
        x = self.lstm2(x)
        x = self.layer_norm(x)
        x = self.dense(x)
        x = self.dropout(x)
        return self.output_layer(x)


model = LSTMregression()

# Adapt normalization layer
model.norm_layer.adapt(X_train)

# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae", "RootMeanSquaredError"])

In [None]:
# Callbacks
tf.keras.backend.clear_session()
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="val_mae", restore_best_weights=True
)
tensorboard = tf.keras.callbacks.TensorBoard("TB_regression_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_mae",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
history = model.fit(
    train_df,
    validation_data=valid_df,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=50,
)

In [11]:
model.save("last_regression_model.keras")  # Save regression the model

# Classification model

In [45]:
import tensorflow as tf
from tensorflow.keras import regularizers


class LSTMclassification(tf.keras.Model):
    def __init__(self, num_classes, num_units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)

        self.norm_layer = tf.keras.layers.Normalization()
        self.conv1 = tf.keras.layers.Conv1D(
            filters=num_units * 2,
            kernel_size=2,
            strides=1,
            padding="causal",
            activation=activation,
        )
        self.lstm1 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.lstm2 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=False, stateful=False)
        )
        self.layer_norm = tf.keras.layers.LayerNormalization()
        self.dense = tf.keras.layers.Dense(
            num_units,
            activation=activation,
            kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(0.01),
        )
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.output_layer = tf.keras.layers.Dense(num_classes, activation="softmax")

    def call(self, inputs):
        x = self.norm_layer(inputs)
        x = self.conv1(x)
        x = self.lstm1(x)
        x = self.layer_norm(x)
        x = self.lstm2(x)
        x = self.dense(x)
        x = self.dropout(x)
        return self.output_layer(x)


# Assuming df, X_train, and num_classes are already defined
num_features = len(df.columns) - 1
model = LSTMclassification(num_classes=3, num_units=30)

# Adapt normalization layer
model.norm_layer.adapt(X_train)

model.build(input_shape=(batch_size, train_length, num_features))

# Compile the model
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall()],
)

model.summary()

Model: "lst_mclassification_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization_13 (Normaliz  multiple                  31        
 ation)                                                          
                                                                 
 conv1d_13 (Conv1D)          multiple                  1860      
                                                                 
 bidirectional_26 (Bidirect  multiple                  21840     
 ional)                                                          
                                                                 
 bidirectional_27 (Bidirect  multiple                  21840     
 ional)                                                          
                                                                 
 layer_normalization_13 (La  multiple                  120       
 yerNormalization)                          

In [None]:
from sklearn.utils import class_weight

tf.keras.backend.clear_session()

class_weights = class_weight.compute_class_weight(
    "balanced", classes=np.unique(y_train), y=y_train
)

# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="accuracy", restore_best_weights=True
)

tensorboard = tf.keras.callbacks.TensorBoard("TB_classification_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="accuracy",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
model.fit(
    train_df,
    validation_data=valid_df,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=1,
    class_weight=class_weights,
)

In [None]:
model.save("last_classification_model.keras")