In [4]:
from preprocessing import get_features_and_target

# LSTM regression on returns

# Preprocessing regression

In [5]:
symbol = "BTC/USDT"
feature_lags = []
valid_split = 2000
train_length = 48
batch_size = 32
seed = 44

df = get_features_and_target(
    symbol,
    days_to_forecast=1,
    feature_lags=feature_lags,
    model_type="reg",
)

symbol = symbol.replace("/", ":")

X = df.drop(columns=f"{symbol}_target")

y = df[f"{symbol}_target"].copy()

X_train, X_valid, y_train, y_valid = (
    X[:-valid_split],
    X[-valid_split:],
    y[:-valid_split],
    y[-valid_split:],
)

train_df = tf.keras.utils.timeseries_dataset_from_array(
    X_train,
    targets=y_train[(train_length - 1) :],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=False,
    seed=seed,
)

valid_df = tf.keras.utils.timeseries_dataset_from_array(
    X_valid,
    targets=y_valid[(train_length - 1) :],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=False,
    seed=seed,
)

# Check
# Initialize variables to hold the first batch's data
# Get the first batch from the dataset
for X_batch, y_batch in train_df.take(1):
    # Extract the first sample from the batch
    first_sample_features = X_batch[0][-1].numpy().tolist()
    first_sample_target = y_batch[0]

    # Print the feature vector and its corresponding target
    print("Feature Vector (First Sample):", first_sample_features)
    print("Target (First Sample):", first_sample_target)
    break  # Exit after processing the first batch

X_train.iloc[47], y_train[47]
y_train

Feature Vector (First Sample): [4320.661111111117, 4313.296499999998, 4313.8142000000025, 4139.704399999998, 48.092587697298235, 4376.440449600143, 4250.152550399852, 17.075320058893993, -2.8431784957280275, 54.29219000000007, 4342.780000000001, 27.8199457847807, 24.468002066163535, 0.0, 42.85714285714286]
Target (First Sample): tf.Tensor(-0.0027796306066146803, shape=(), dtype=float64)


2023-12-03 13:38:36.029935: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2023-12-03 13:38:36.029961: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-12-03 13:38:36.029968: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-12-03 13:38:36.030226: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-03 13:38:36.030596: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  X_train.iloc[47], y_train[47]


BTC:USDT_timestamp
2017-08-25 11:00:00   -0.016635
2017-08-25 12:00:00   -0.015247
2017-08-25 13:00:00   -0.011621
2017-08-25 14:00:00   -0.007201
2017-08-25 15:00:00   -0.027162
                         ...   
2023-08-13 01:00:00   -0.006273
2023-08-13 02:00:00   -0.004290
2023-08-13 03:00:00   -0.000634
2023-08-13 04:00:00    0.000657
2023-08-13 05:00:00    0.002130
Name: BTC:USDT_target, Length: 49305, dtype: float64

# Model Regression

In [9]:
import tensorflow as tf
from tensorflow.keras import regularizers

tf.keras.backend.clear_session()

class LSTMregression(tf.keras.Model):
    def __init__(self, num_units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)

        self.norm_layer = tf.keras.layers.Normalization()
        self.conv1 = tf.keras.layers.Conv1D(
            filters=num_units * 2,
            kernel_size=2,
            strides=1,
            padding="causal",
            activation=activation,
        )
        self.lstm1 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.lstm2 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.layer_norm = tf.keras.layers.LayerNormalization()
        self.dense = tf.keras.layers.Dense(
            num_units,
            activation=activation,
            kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(0.01),
        )
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.output_layer = tf.keras.layers.Dense(1)

    def call(self, inputs):
        x = self.norm_layer(inputs)
        x = self.conv1(x)
        x = self.lstm1(x)
        x = self.layer_norm(x)
        x = self.lstm2(x)
        x = self.layer_norm(x)
        x = self.dense(x)
        x = self.dropout(x)
        return self.output_layer(x)


# Assuming df and X_train are already defined
num_features = len(df.columns) - 1
model = LSTMregression(num_features=num_features)

# Adapt normalization layer
model.norm_layer.adapt(X_train)

# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae", "RootMeanSquaredError"])

# Model summary
model.summary()

Model: "lst_mregression"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizati  multiple                  31        
 on)                                                             
                                                                 
 conv1d (Conv1D)             multiple                  1860      
                                                                 
 bidirectional (Bidirection  multiple                  21840     
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  multiple                  21840     
 onal)                                                           
                                                                 
 layer_normalization (Layer  multiple                  120       
 Normalization)                                    

In [None]:
# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="accuracy", restore_best_weights=True
)
tensorboard = tf.keras.callbacks.TensorBoard("TB_regression_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_mae",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
history = model.fit(
    train_df,
    validation_data=valid_df,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=50,
)

In [11]:
model.save("last_regression_model.keras")  # Save regression the model

# Classification model

# Preprocessing classificaton

In [25]:
symbol = "BTC/USDT"
feature_lags = []
valid_split = 2000
train_length = 48
batch_size = 32
seed = 44

df_classification = get_features_and_target(
    symbol,
    days_to_forecast=1,
    feature_lags=feature_lags,
    model_type="class",
)

symbol = symbol.replace("/", ":")

X = df_classification.drop(columns=f"{symbol}_target")

y = df_classification[f"{symbol}_target"].copy().map(lambda x: int(x))

X_train, X_valid, y_train, y_valid = (
    X[:-valid_split],
    X[-valid_split:],
    y[:-valid_split],
    y[-valid_split:],
)

train_df_classification = tf.keras.utils.timeseries_dataset_from_array(
    X_train,
    targets=y_train[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,  # Shaffles the sequences, not within sequences
    seed=seed,
)

valid_df_classification = tf.keras.utils.timeseries_dataset_from_array(
    X_valid,
    targets=y_valid[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,
    seed=seed,
)

for X_batch, y_batch in train_df_classification.take(1):
    print("Train batch X shape:", X_batch)
    print("Train batch y shape:", y_batch)  # starts from y.iloc[48]

print(f"X_shape = {X_batch.shape}")
print(f"y_shape = {y.shape}")

Train batch X shape: tf.Tensor(
[[[3.98791444e+03 3.99111850e+03 3.99332180e+03 ... 4.12691418e+01
   9.28571429e+01 3.57142857e+01]
  [3.98707444e+03 3.99132450e+03 3.99361620e+03 ... 4.48170716e+01
   8.57142857e+01 2.85714286e+01]
  [3.98757889e+03 3.99173250e+03 3.99387920e+03 ... 5.60380796e+01
   7.85714286e+01 2.14285714e+01]
  ...
  [3.93456889e+03 3.94889050e+03 3.97370180e+03 ... 4.02517141e+01
   2.85714286e+01 0.00000000e+00]
  [3.93641000e+03 3.94567800e+03 3.97233600e+03 ... 2.73721019e+01
   2.14285714e+01 0.00000000e+00]
  [3.93529667e+03 3.94265750e+03 3.97118800e+03 ... 2.21275047e+01
   1.42857143e+01 0.00000000e+00]]

 [[9.36813667e+03 9.34704100e+03 9.33306280e+03 ... 2.03941418e+01
   0.00000000e+00 6.42857143e+01]
  [9.36539000e+03 9.35106200e+03 9.33216940e+03 ... 2.80374919e+01
   8.57142857e+01 5.71428571e+01]
  [9.36295444e+03 9.35489250e+03 9.33143260e+03 ... 3.60592276e+01
   1.00000000e+02 5.00000000e+01]
  ...
  [9.64090000e+03 9.61000900e+03 9.46923360e+

In [45]:
import tensorflow as tf
from tensorflow.keras import regularizers


class LSTMclassification(tf.keras.Model):
    def __init__(self, num_classes, num_units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)

        self.norm_layer = tf.keras.layers.Normalization()
        self.conv1 = tf.keras.layers.Conv1D(
            filters=num_units * 2,
            kernel_size=2,
            strides=1,
            padding="causal",
            activation=activation,
        )
        self.lstm1 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=True, stateful=False)
        )
        self.lstm2 = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(num_units, return_sequences=False, stateful=False)
        )
        self.layer_norm = tf.keras.layers.LayerNormalization()
        self.dense = tf.keras.layers.Dense(
            num_units,
            activation=activation,
            kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(0.01),
        )
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.output_layer = tf.keras.layers.Dense(num_classes, activation="softmax")

    def call(self, inputs):
        x = self.norm_layer(inputs)
        x = self.conv1(x)
        x = self.lstm1(x)
        x = self.layer_norm(x)
        x = self.lstm2(x)
        x = self.dense(x)
        x = self.dropout(x)
        return self.output_layer(x)


# Assuming df, X_train, and num_classes are already defined
num_features = len(df.columns) - 1
model = LSTMclassification(num_classes=3, num_units=30)

# Adapt normalization layer
model.norm_layer.adapt(X_train)

model.build(input_shape=(batch_size, train_length, num_features))

# Compile the model
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall()],
)

model.summary()

Model: "lst_mclassification_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization_13 (Normaliz  multiple                  31        
 ation)                                                          
                                                                 
 conv1d_13 (Conv1D)          multiple                  1860      
                                                                 
 bidirectional_26 (Bidirect  multiple                  21840     
 ional)                                                          
                                                                 
 bidirectional_27 (Bidirect  multiple                  21840     
 ional)                                                          
                                                                 
 layer_normalization_13 (La  multiple                  120       
 yerNormalization)                          

In [46]:
from sklearn.utils import class_weight


# class_weights = class_weight.compute_class_weight(
#     "balanced", classes=np.unique(y_train), y=y_train
# )

class_weights = {0: 0.48, 1: 2.26, 2: 1.95}

# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="accuracy", restore_best_weights=True
)

tensorboard = tf.keras.callbacks.TensorBoard("TB_classification_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="accuracy",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
model.fit(
    train_df_classification,
    validation_data=valid_df_classification,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=1,
    class_weight=class_weights,
)

ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1155, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1249, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 620, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/metrics/confusion_metrics.py", line 470, in update_state  **
        return metrics_utils.update_confusion_matrix_variables(
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 672, in update_confusion_matrix_variables
        y_pred.shape.assert_is_compatible_with(y_true.shape)

    ValueError: Shapes (None, 3) and (None, 1) are incompatible


In [None]:
model.save("last_classification_model.keras")