In [20]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from collections import defaultdict
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Dense, Dropout, LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
import random


In [9]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

import json
with open('ml_100k.json', 'r') as f:
    data = json.load(f)


In [10]:
# Extract sequences and labels
sequences = [entry[0].split(" | ") for entry in data]
labels = [entry[1] for entry in data]

In [11]:
# Create a mapping for items to indices
unique_items = set(item for seq in sequences for item in seq)
unique_items.update(labels)
item_to_idx = {item: idx + 1 for idx, item in enumerate(unique_items)}  # Start indices from 1
idx_to_item = {idx: item for item, idx in item_to_idx.items()}
num_items = len(item_to_idx) + 1  # Include padding index (0)

In [12]:
# Encode sequences and labels
encoded_sequences = [[item_to_idx[item] for item in seq] for seq in sequences]
encoded_labels = [item_to_idx[label] for label in labels]

In [13]:
# Pad sequences
max_seq_len = 50
padded_sequences = pad_sequences(encoded_sequences, maxlen=max_seq_len, padding='pre', truncating='pre')

In [15]:
# Split data into training and validation sets
train_size = int(0.8 * len(padded_sequences))
X_train, X_val = padded_sequences[:train_size], padded_sequences[train_size:]
y_train, y_val = np.array(encoded_labels[:train_size]), np.array(encoded_labels[train_size:])

In [24]:
# Define SASRec components
class SASRec(Model):
    def __init__(self, num_items, embedding_dim=64, num_heads=4, num_layers=2, dropout_rate=0.2):
        super(SASRec, self).__init__()
        self.item_embedding = Embedding(num_items, embedding_dim, mask_zero=True)
        self.position_embedding = Embedding(max_seq_len, embedding_dim)
        self.attention_layers = [
            tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim)
            for _ in range(num_layers)
        ]
        self.layer_norms = [LayerNormalization(epsilon=1e-6) for _ in range(num_layers)]
        self.dropout = Dropout(dropout_rate)
        self.dense = Dense(num_items)
    
    def call(self, inputs, training=False):
        seq, positions = inputs
        seq_emb = self.item_embedding(seq) + self.position_embedding(positions)
        mask = tf.cast(tf.not_equal(seq, 0), tf.float32)[:, tf.newaxis, tf.newaxis, :]
        
        for attn, ln in zip(self.attention_layers, self.layer_norms):
            attn_output = attn(seq_emb, seq_emb, attention_mask=mask)
            seq_emb = ln(seq_emb + attn_output)
            seq_emb = self.dropout(seq_emb, training=training)
        
        return self.dense(seq_emb[:, -1])

In [25]:
# Build positional indices
positions = np.tile(np.arange(max_seq_len), (len(padded_sequences), 1))
positions = pad_sequences(positions, maxlen=max_seq_len, padding='pre', truncating='pre')


In [26]:
# Create and compile the model
embedding_dim = 64
num_heads = 4
num_layers = 2
dropout_rate = 0.2

model = SASRec(num_items=num_items, embedding_dim=embedding_dim, num_heads=num_heads, num_layers=num_layers, dropout_rate=dropout_rate)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [30]:
# Train the model
batch_size = 64
epochs = 10

model.fit(
    x=(X_train, positions[:train_size]),
    y=y_train,
    validation_data=((X_val, positions[train_size:]), y_val),
    batch_size=batch_size,
    epochs=epochs
)

Epoch 1/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.0035 - loss: 7.3290 - val_accuracy: 0.0000e+00 - val_loss: 7.3139
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.0092 - loss: 7.3139 - val_accuracy: 0.0000e+00 - val_loss: 7.3139
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.0060 - loss: 7.3139 - val_accuracy: 0.0000e+00 - val_loss: 7.3139
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.0039 - loss: 7.3139 - val_accuracy: 0.0000e+00 - val_loss: 7.3139
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 8.8899e-04 - loss: 7.3139 - val_accuracy: 0.0000e+00 - val_loss: 7.3139
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.0073 - loss: 7.3160 - val_accuracy: 0.0053 - val_loss: 7.3139
Epoch 7/10

<keras.src.callbacks.history.History at 0x209e02ae780>

In [31]:
model.save('sasrec_model.h5')



In [32]:
test_sequence = ["The Rock", "Titanic", "Jurassic Park"]
encoded_test_sequence = pad_sequences([[item_to_idx[item] for item in test_sequence]], maxlen=max_seq_len, padding='pre')
predictions = model.predict((encoded_test_sequence, positions[:1]))
top_items = np.argsort(predictions[0])[-10:][::-1]
recommended_items = [idx_to_item[idx] for idx in top_items]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step


In [33]:
print("Recommended Items:", recommended_items)

Recommended Items: ['Striptease', 'Bound', 'Washington Square', 'Backbeat', 'The Rock', 'Home Alone 3', 'Afterglow', 'The Blue Angel, The (Blaue Engel,', 'The Associate', 'Hear My Song']


In [36]:
def evaluate_model(model, sequences, labels, item_to_idx, max_seq_len, k=10):
    hits, ndcgs = [], []

    positions = np.tile(np.arange(max_seq_len), (len(sequences), 1))
    padded_sequences = pad_sequences(sequences, maxlen=max_seq_len, padding='pre', truncating='pre')

    for seq, label in zip(padded_sequences, labels):
        seq = np.expand_dims(seq, axis=0)
        pos = np.expand_dims(positions[0], axis=0)
        
        predictions = model.predict((seq, pos), verbose=0).flatten()

        top_k_items = np.argsort(predictions)[-k:][::-1]
        label_idx = item_to_idx[label]
        
        if label_idx in top_k_items:
            hits.append(1)
        else:
            hits.append(0)
        
        if label_idx in top_k_items:
            rank = np.where(top_k_items == label_idx)[0][0] + 1  # 1-based rank
            ndcgs.append(1 / np.log2(rank + 1))
        else:
            ndcgs.append(0)
    
    hr = np.mean(hits)
    ndcg = np.mean(ndcgs)
    
    return hr, ndcg

test_sequences = X_val
test_labels = [idx_to_item[idx] for idx in y_val]

hr, ndcg = evaluate_model(model, test_sequences, test_labels, item_to_idx, max_seq_len, k=19)

print(f"HR@10: {hr:.4f}")
print(f"NDCG@10: {ndcg:.4f}")

HR@10: 0.0106
NDCG@10: 0.0065


In [43]:
import numpy as np

def evaluate_hr_at_k(model, X, positions, y, k=10):
    predictions = model.predict((X, positions), batch_size=64)
    top_k_indices = np.argsort(predictions, axis=1)[:, -k:][:, ::-1]  
    hits = np.any(top_k_indices == y[:, np.newaxis], axis=1)
    hr_at_k = np.mean(hits)
    
    return hr_at_k

hr_at_10 = evaluate_hr_at_k(model, X_val, positions[train_size:], y_val, k=19)
print(f"HR@10: {hr_at_10:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
HR@10: 0.0106
