In [1]:
import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LayerNormalization, Dropout, MultiHeadAttention
from tensorflow.keras.models import Model

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [15]:
ticker_features = pd.read_csv('ticker_features.csv', index_col=["Date", "Name"])

In [5]:
def transformer_encoder(inputs, num_heads=4, ff_dim=64, dropout=0.1):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
    x = LayerNormalization()(inputs + attention)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return LayerNormalization()(inputs + x) 

In [7]:
def create_ticker_transformer(seq_length=30, num_features=4, num_heads=4, ff_dim=64, num_tickers=30, embed_dim=8):
    
    ticker_input = Input(shape=(), dtype=tf.int32, name="ticker_input")
    ticker_embedding = tf.keras.layers.Embedding(input_dim=num_tickers+1, output_dim=embed_dim)(ticker_input)

    features_input = Input(shape=(seq_length, num_features), name="features_input")
    expanded_embedding = tf.keras.layers.RepeatVector(seq_length)(ticker_embedding)

    x = tf.keras.layers.Concatenate()([features_input, expanded_embedding])
    x = transformer_encoder(x, num_heads, ff_dim)
    x = transformer_encoder(x, num_heads, ff_dim)
    
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = Dense(32, activation="relu")(x)
    x = Dense(1, activation="linear")(x)

    return Model([ticker_input, features_input], x)

In [29]:
ticker_model = create_ticker_transformer(num_tickers=30)
ticker_model.compile(optimizer="adam", loss="mse", metrics=["mae"])

In [11]:
ticker_model.summary()

In [17]:
ticker_scaler = StandardScaler()
X_ticker = ticker_scaler.fit_transform(ticker_features[['Pct_Change', 'MA_5', 'MA_10', 'Volume_MA_5']].values)

label_encoder = LabelEncoder()
ticker_ids = label_encoder.fit_transform(ticker_features.index.get_level_values('Name'))

In [19]:
def create_ticker_sequences(ticker_features, sequence_length=30):
    X, y, ids = [], [], []
    
    for ticker, group in ticker_features.groupby(level='Name'):
        group = group.sort_index(level='Date') 
        data = ticker_scaler.transform(group[['Pct_Change', 'MA_5', 'MA_10', 'Volume_MA_5']].values)
        
        ticker_id = label_encoder.transform([ticker])[0]

        for i in range(len(data) - sequence_length):
            X.append(data[i:i+sequence_length])
            y.append(data[i+sequence_length, 0])
            ids.append(ticker_id)

    return np.array(X), np.array(y), np.array(ids)

# Create sequences with IDs
X_ticker_seq, y_ticker_seq, ticker_id_seq = create_ticker_sequences(ticker_features)

In [21]:
X_train2, X_val2, y_train2, y_val2, id_train2, id_val2 = train_test_split(X_ticker_seq, y_ticker_seq, ticker_id_seq, test_size=0.1, random_state=42)

In [23]:
train_ticker_dataset = tf.data.Dataset.from_tensor_slices(
    ({"features_input": X_train2, "ticker_input": id_train2}, y_train2)
)
train_ticker_dataset = train_ticker_dataset.batch(32, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

val_ticker_dataset = tf.data.Dataset.from_tensor_slices(
    ({"features_input": X_val2, "ticker_input": id_val2}, y_val2)
)
val_ticker_dataset = val_ticker_dataset.batch(32, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

In [31]:
ticker_model.fit(train_ticker_dataset, epochs=6, validation_data=val_ticker_dataset)

Epoch 1/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 32ms/step - loss: 1.0450 - mae: 0.6461 - val_loss: 0.9876 - val_mae: 0.6392
Epoch 2/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 39ms/step - loss: 1.0381 - mae: 0.6417 - val_loss: 0.9844 - val_mae: 0.6373
Epoch 3/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 43ms/step - loss: 1.0375 - mae: 0.6413 - val_loss: 0.9843 - val_mae: 0.6370
Epoch 4/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 44ms/step - loss: 1.0373 - mae: 0.6414 - val_loss: 0.9844 - val_mae: 0.6373
Epoch 5/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 38ms/step - loss: 1.0368 - mae: 0.6411 - val_loss: 0.9834 - val_mae: 0.6364
Epoch 6/6
[1m2598/2598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 44ms/step - loss: 1.0364 - mae: 0.6411 - val_loss: 0.9835 - val_mae: 0.6365


<keras.src.callbacks.history.History at 0x2c202766fc0>