In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Embedding, Dense, LayerNormalization, MultiHeadAttention, Dropout)
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dropout

# ===================== Fixing Sequence Length =====================
max_seq_length = 49  # Ensure consistency in sequence lengths

# Ensure all sequences are padded to max_seq_length
train_sequences = pad_sequences(train_sequences, maxlen=max_seq_length, padding='post')
train_labels = np.array(train_labels)  # Ensure labels are NumPy arrays


# ===================== Transformer Block =====================
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.norm1 = LayerNormalization(epsilon=1e-6)
        self.norm2 = LayerNormalization(epsilon=1e-6)
        self.dense = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, mask=None):
        if mask is not None:
            # Ensure correct shape: (batch_size, 1, seq_length, seq_length)
            mask = tf.cast(mask[:, tf.newaxis, tf.newaxis, :], dtype=tf.float32)

        attn_output = self.attention(inputs, inputs, attention_mask=mask)
        attn_output = self.dropout1(attn_output)
        x = self.norm1(inputs + attn_output)
        dense_output = self.dense(x)
        dense_output = self.dropout2(dense_output)
        return self.norm2(x + dense_output)


# ===================== BERT4Rec Model =====================
class BERT4Rec(Model):
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, num_blocks, max_len):
        super(BERT4Rec, self).__init__()
        self.item_embedding = Embedding(vocab_size, embed_dim, input_length=max_len)
        self.position_embedding = Embedding(max_len, embed_dim)
        self.transformer_blocks = [
            TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_blocks)
        ]
        self.dense = Dense(vocab_size, activation="softmax")

    def call(self, inputs):
        seq_length = tf.shape(inputs)[1]

        positions = tf.range(start=0, limit=seq_length, delta=1)
        positions = self.position_embedding(positions)

        x = self.item_embedding(inputs) + positions
        mask = tf.math.not_equal(inputs, 0)  # Mask padded values

        for block in self.transformer_blocks:
            x = block(x, mask)

        return self.dense(x[:, -1, :])  # Predict next item


# ===================== Compile & Train Model =====================
vocab_size = 10000  # Adjust as per your dataset
embed_dim = 128
num_heads = 4
ff_dim = 256
num_blocks = 2

bert4rec_model = BERT4Rec(vocab_size, embed_dim, num_heads, ff_dim, num_blocks, max_seq_length)
bert4rec_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
bert4rec_model.fit(train_sequences, train_labels, batch_size=32, epochs=10, validation_split=0.1)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
bert4rec_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Epoch 1/10




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.0606 - loss: 9.1368 - val_accuracy: 0.2000 - val_loss: 8.7307
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 300ms/step - accuracy: 0.3371 - loss: 8.3565 - val_accuracy: 0.2000 - val_loss: 8.3565
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - accuracy: 0.3267 - loss: 7.9058 - val_accuracy: 0.2000 - val_loss: 7.9610
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 440ms/step - accuracy: 0.3674 - loss: 7.4653 - val_accuracy: 0.2000 - val_loss: 7.5311
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.3210 - loss: 7.0565 - val_accuracy: 0.2000 - val_loss: 7.0357
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 345ms/step - accuracy: 0.3163 - loss: 6.6021 - val_accuracy: 0.2000 - val_loss: 6.5786
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m