In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Import packages
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional, Masking # type: ignore
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from tensorflow.keras import regularizers # type: ignore
import pickle
import json
import datetime

# Local imports
from src.data_processing.lstm_data_preprocessing_v3 import reduce_time_bucket_features, FeaturesConfig
from src.data_processing.loader import load_time_bucket_data

In [3]:
# Define configs
features_config = FeaturesConfig(
        relative_time=True,
        price_change=True,
    )

test_size = 0.2

In [4]:
# Generate train - test data

X_scaler = StandardScaler()
y_scaler = StandardScaler()

time_bucket_folder = "time_bucket_1"  # Change based on which time bucket configuration you want to used, their preprocessed in different folders
token_time_buckets, time_bucket_config = load_time_bucket_data(time_bucket_folder)

token_datasets = []
for token_address, data in token_time_buckets.items():
    X = data["X"]
    y = data["y"]
    bucket_times = data["bucket_times"]

    # Only get the features listed in features_config
    X = reduce_time_bucket_features(X, features_config)

    token_datasets.append((X, y, token_address, bucket_times))

# Combine all token data
all_X = np.vstack([data[0] for data in token_datasets])
all_y = np.vstack([data[1].reshape(-1, 1) for data in token_datasets])

# Scale features
num_samples, time_steps, features = all_X.shape
X_reshaped = all_X.reshape(num_samples * time_steps, features)
X_scaled = X_scaler.fit_transform(X_reshaped)
X_scaled = X_scaled.reshape(num_samples, time_steps, features)

# Scale target variable also using StandardScaler to preserve direction
y_scaled = y_scaler.fit_transform(all_y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=test_size, shuffle=False)

In [5]:
# Custom loss function
def weighted_mse_large_moves(y_true, y_pred):
    diff = y_true - y_pred
    weight = tf.math.square(y_true)
    return tf.reduce_mean(weight * tf.square(diff))

# Model definition
model = Sequential()

# Masking layer for the padded values
model.add(Masking(mask_value=0., input_shape=(X_train.shape[1], X_train.shape[2])))

# Input + first LSTM layer
model.add(Bidirectional(
    LSTM(64, return_sequences=True, kernel_regularizer=regularizers.l2(1e-4))
))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Second LSTM
model.add(Bidirectional(
    LSTM(64, return_sequences=False, kernel_regularizer=regularizers.l2(1e-4))
))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Dense output layer
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# Compile
model.compile(optimizer='adam', loss=weighted_mse_large_moves)

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=0.001,
    mode='min',
    restore_best_weights=True,
    verbose=1
)

# Train
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
    verbose=1
)


  super().__init__(**kwargs)


Epoch 1/100
[1m2089/2089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m360s[0m 170ms/step - loss: 6.4658 - val_loss: 6.3840
Epoch 2/100
[1m2089/2089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 177ms/step - loss: 5.9315 - val_loss: 6.1333
Epoch 3/100
[1m1082/2089[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m2:49[0m 168ms/step - loss: 6.0408

KeyboardInterrupt: 

In [None]:
# Save model

# Create a directory to save all model artifacts
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
base_dir = "trained_models"
model_name = f"lstm_{len(os.listdir(base_dir)) + 1}"
model_dir = os.path.join(base_dir, model_name)
os.makedirs(model_dir, exist_ok=True)

# 1. Save the Keras model
model_path = os.path.join(model_dir, "model.keras")
model.save(model_path)
print(f"Model saved to {model_path}")


# 2. Save configuration parameters
# Get optimizer and loss function from model.compile
optimizer_name = model.optimizer.__class__.__name__.lower()
loss_name = model.loss.__name__ if callable(model.loss) else model.loss

# Extract model layers information
model_layers = []
for layer in model.layers:
    layer_config = {
        "name": layer.name,
        "type": layer.__class__.__name__
    }
    
    # Add units if available (for LSTM and Dense layers)
    if hasattr(layer, "units"):
        layer_config["units"] = layer.units
        
    # Add activation if available
    if hasattr(layer, "activation") and layer.activation is not None:
        if hasattr(layer.activation, "__name__"):
            layer_config["activation"] = layer.activation.__name__
        else:
            layer_config["activation"] = str(layer.activation)
            
    # Add dropout rate if applicable
    if hasattr(layer, "rate"):
        layer_config["rate"] = layer.rate
        
    # Try to get output shape from layer.output if it exists
    try:
        if hasattr(layer, "output") and layer.output is not None:
            output_shape = layer.output.shape.as_list()
            layer_config["output_shape"] = [dim if dim is not None else -1 for dim in output_shape]
    except (AttributeError, ValueError):
        # Skip output shape if it causes an error
        pass
        
    model_layers.append(layer_config)

# Create configuration dictionary with all parameters used
config = {
    "features_config": vars(features_config),  # Convert class to dict
    "time_bucket_folder": time_bucket_folder,
    "test_size": test_size,
    "training_params": {
        "optimizer": optimizer_name,
        "loss": loss_name,
    },
    "model_architecture": {
        "layers": model_layers,
        "total_params": model.count_params()
    },
    "timestamp": timestamp,
    "input_shape": [dim if dim is not None else -1 for dim in model.input_shape],  # Save input shape
    "X_train_shape": list(X_train.shape),
    "y_train_shape": list(y_train.shape)
}

config_path = os.path.join(model_dir, "config.json")
with open(config_path, "w") as f:
    json.dump(config, f, indent=4)
print(f"Configuration saved to {config_path}")
print(f"\nAll model artifacts saved to {model_dir}")

In [None]:
import matplotlib.pyplot as plt

plt.hist(y_train, bins=100, alpha=0.5, label="Train")
plt.hist(y_test, bins=100, alpha=0.5, label="Test")
plt.legend()
plt.title("Distribution of Target (y)")
plt.show()
