In [2]:
from preprocessing import get_features_and_target
import tensorflow as tf
from tensorflow.keras.metrics import Precision, Recall

# LSTM regression on returns

# Preprocessing 

In [15]:
symbol = "BTC/USDT"
feature_lags = []
valid_split = 2000
train_length = 48
batch_size = 32
seed = 44

df = get_features_and_target(
    symbol,
    days_to_forecast=1,
    feature_lags=feature_lags,
    model="LSTM",
    model_type="reg",
)

symbol = symbol.replace("/", ":")

X = df.drop(columns=f"{symbol}_target")

y = df[f"{symbol}_target"].copy()

X_train, X_valid, y_train, y_valid = (
    X[:-valid_split],
    X[-valid_split:],
    y[:-valid_split],
    y[-valid_split:],
)

train_df = tf.keras.utils.timeseries_dataset_from_array(
    X_train,
    targets=y_train[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,  # Shaffles the sequences, not within sequences
    seed=seed,
)

valid_df = tf.keras.utils.timeseries_dataset_from_array(
    X_valid,
    targets=y_valid[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,
    seed=seed,
)

for X_batch, y_batch in train_df.take(1000):
    print("Train batch X shape:", X_batch.shape)
    print("Train batch y shape:", y_batch.shape)  # starts from y.iloc[48]

print(len(df))

Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)
Train batch X shape: (32

# Model Regression

In [11]:
tf.keras.backend.clear_session()

# Preperation of layers
conv1 = tf.keras.layers.Conv1D(
    filters=60,
    kernel_size=3,
    strides=1,
    padding="causal",
    activation="relu",
    input_shape=[None, len(df.columns) - 1],
)

# Bidirectional LSTM tf.keras.layers
lstm1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(30, return_sequences=True))
# lstm2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(30, return_sequences=False))
norm_layer = tf.keras.layers.Normalization()
norm_layer.adapt(X_train)

# Model
inputs = tf.keras.layers.Input(shape=(None, len(df.columns) - 1))
x = norm_layer(inputs)
x = conv1(x)
x = lstm1(x)
# x = tf.keras.layers.Dropout(0.1)(x)
# x = lstm2(x)
# x = tf.keras.layers.Dropout(0.1)(x)
# x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")(x)
x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(1)(x)

# Optimizers and losses
SGD_optimizer = tf.keras.optimizers.legacy.SGD(
    learning_rate=0.001, momentum=0.9, nesterov=True
)
adam_opt = tf.keras.optimizers.AdamW(learning_rate=0.001)

huber_loss = tf.keras.losses.Huber()
mse_loss = "mse"

model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="adam", loss=mse_loss, metrics=["mae", "RootMeanSquaredError"])
model.summary()



Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 15)]        0         
                                                                 
 normalization (Normalizati  (None, None, 15)          31        
 on)                                                             
                                                                 
 conv1d (Conv1D)             (None, None, 60)          2760      
                                                                 
 bidirectional (Bidirection  (None, None, 60)          21840     
 al)                                                             
                                                                 
 dense (Dense)               (None, None, 30)          1830      
                                                                 
 dropout (Dropout)           (None, None, 30)          0     

In [None]:
# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="val_mae", restore_best_weights=True
)
tensorboard = tf.keras.callbacks.TensorBoard("TB_regression_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_mae",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
model.fit(
    train_df,
    validation_data=valid_df,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=50,
)

In [19]:
model.save("last_regression_model.keras")  # Save regression the model

# Classification model

# Preprocessing

In [3]:
symbol = "BTC/USDT"
feature_lags = []
valid_split = 2000
train_length = 48
batch_size = 32
seed = 44

df = get_features_and_target(
    symbol,
    days_to_forecast=1,
    feature_lags=feature_lags,
    model="LSTM",
    model_type="class",
)

symbol = symbol.replace("/", ":")

X = df.drop(columns=f"{symbol}_target")

y = df[f"{symbol}_target"].copy()

X_train, X_valid, y_train, y_valid = (
    X[:-valid_split],
    X[-valid_split:],
    y[:-valid_split],
    y[-valid_split:],
)

train_df = tf.keras.utils.timeseries_dataset_from_array(
    X_train,
    targets=y_train[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,  # Shaffles the sequences, not within sequences
    seed=seed,
)

valid_df = tf.keras.utils.timeseries_dataset_from_array(
    X_valid,
    targets=y_valid[train_length:],
    sequence_length=train_length,
    batch_size=batch_size,
    shuffle=True,
    seed=seed,
)

for X_batch, y_batch in train_df.take(1):
    print("Train batch X shape:", X_batch.shape)
    print("Train batch y shape:", y_batch.shape)  # starts from y.iloc[48]

y.value_counts()

Train batch X shape: (32, 48, 15)
Train batch y shape: (32,)


2023-11-20 11:27:31.689900: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2023-11-20 11:27:31.689924: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-11-20 11:27:31.689928: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-11-20 11:27:31.689965: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-20 11:27:31.689984: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


BTC:USDT_target
0.0    35422
2.0     8564
1.0     7320
Name: count, dtype: int64

In [10]:
tf.keras.backend.clear_session()

# Preperation of layers
conv1 = tf.keras.layers.Conv1D(
    filters=60,
    kernel_size=3,
    strides=1,
    padding="causal",
    activation="relu",
    input_shape=[None, len(df.columns) - 1],
)

# Bidirectional LSTM tf.keras.layers
lstm1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(30, return_sequences=True))
# lstm2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(30, return_sequences=False))
norm_layer = tf.keras.layers.Normalization()
norm_layer.adapt(X_train)

# Model
inputs = tf.keras.layers.Input(shape=(None, len(df.columns) - 1))
x = norm_layer(inputs)
x = conv1(x)
x = lstm1(x)
# x = tf.keras.layers.Dropout(0.1)(x)
# x = lstm2(x)
# x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")(x)
# x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(3, activation="softmax")(x)

# Optimizers and loss
SGD_optimizer = tf.keras.optimizers.legacy.SGD(
    learning_rate=0.001, momentum=0.9, nesterov=True
)
adam_opt = tf.keras.optimizers.AdamW(learning_rate=0.001)

categorical_loss = tf.keras.losses.SparseCategoricalCrossentropy()

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer=adam_opt,
    loss=categorical_loss,
    metrics=["accuracy", Precision(), Recall()],
)
model.summary()



Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 15)]        0         
                                                                 
 normalization (Normalizati  (None, None, 15)          31        
 on)                                                             
                                                                 
 conv1d (Conv1D)             (None, None, 60)          2760      
                                                                 
 bidirectional (Bidirection  (None, None, 60)          21840     
 al)                                                             
                                                                 
 global_average_pooling1d (  (None, 60)                0         
 GlobalAveragePooling1D)                                         
                                                             

In [11]:
from sklearn.utils import class_weight


# class_weights = class_weight.compute_class_weight(
#     "balanced", classes=np.unique(y_train), y=y_train
# )
class_weights = {0: 0.48, 1: 2.26, 2: 1.95}

# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=3, monitor="accuracy", restore_best_weights=True
)

tensorboard = tf.keras.callbacks.TensorBoard("TB_classification_logs")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="accuracy",
    factor=0.5,  # Reduce learning rate by half
    patience=1,  # Number of epochs with no improvement
    min_lr=0.0001,  # Minimum learning rate
)
model.fit(
    train_df,
    validation_data=valid_df,
    callbacks=[early_stopping, tensorboard, reduce_lr],
    epochs=50,
    class_weight=class_weights,
)

Epoch 1/50


ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1131, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/training.py", line 1225, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 620, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/metrics/confusion_metrics.py", line 470, in update_state  **
        return metrics_utils.update_confusion_matrix_variables(
    File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 672, in update_confusion_matrix_variables
        y_pred.shape.assert_is_compatible_with(y_true.shape)

    ValueError: Shapes (None, 3) and (None, 1) are incompatible


In [None]:
model.save("last_classification_model.keras") 