In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
from sklearn.preprocessing import LabelEncoder
import gym
from gym import spaces
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from scipy.sparse import coo_matrix
import scipy as sp
import copy
import random

In [10]:

# Data preprocessing
# Load the Yoochoose dataset and preprocess it to create session-based sequences of interactions
data = pd.read_csv('yoochoose_dataset/filtered_clicks.dat',
                   names=['session_id', 'timestamp', 'item_id', 'category'],
                   dtype={'session_id': 'int64', 'timestamp': 'str', 'item_id': 'int64', 'category': 'int64'},
                   parse_dates=['timestamp'])

# Create item and session maps
item_map = dict(zip(np.unique(data.item_id), range(len(np.unique(data.item_id)))))
session_map = dict(zip(np.unique(data.session_id), range(len(np.unique(data.session_id)))))

# Map item and session IDs
data['item_id'] = data['item_id'].map(item_map)
data['session_id'] = data['session_id'].map(session_map)

# Sort by session and timestamp
data = data.sort_values(['session_id', 'timestamp'])

# Create next item and session columns
data['next_item_id'] = data.groupby('session_id')['item_id'].shift(-1)
data['next_session_id'] = data.groupby('session_id')['session_id'].shift(-1)
data = data.dropna()

# Convert data to numpy arrays
session_ids = data['session_id'].values.astype('int32')
item_ids = data['item_id'].values.astype('int32')
next_item_ids = data['next_item_id'].values.astype('int32')
next_session_ids = data['next_session_id'].values.astype('int32')
timestamps = data['timestamp'].values

# Create a directed graph
graph = nx.DiGraph()

# Add nodes to the graph
graph.add_nodes_from(item_map.values())

# Add edges between items that co-occur in the same session
for session_id in np.unique(session_ids):
    items_in_session = item_ids[session_ids == session_id]
    for i in range(len(items_in_session)):
        for j in range(i + 1, len(items_in_session)):
            if not graph.has_edge(items_in_session[i], items_in_session[j]):
                graph.add_edge(items_in_session[i], items_in_session[j], weight=0)
            graph[items_in_session[i]][items_in_session[j]]['weight'] += 1

# Normalize edge weights
for u, v, d in graph.edges(data=True):
    d['weight'] /= np.sqrt(graph.degree(u) * graph.degree(v))            

# Create adjacency matrix
adj_matrix = coo_matrix(nx.to_numpy_array(graph, weight='weight', dtype=np.float32))
adj_matrix = tf.sparse.SparseTensor(indices=np.array([adj_matrix.row, adj_matrix.col]).T,
                                    values=adj_matrix.data,
                                    dense_shape=adj_matrix.shape)    
print(adj_matrix.shape)
num_nodes = adj_matrix.shape[0] 
    

num_items = len(item_map)
print(num_items)
    
embedding_dim = 32
num_layers = 2
hidden_dim = 32


class GNNActor(tf.keras.Model):
    def __init__(self, num_items, num_features, num_edge_features, hidden_dim):
        super(GNNActor, self).__init__()
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, num_features + num_edge_features), sparse=True)
        self.num_items = num_items
        self.num_features = num_features
        self.num_edge_features = num_edge_features
        self.hidden_dim = hidden_dim

        # Define node embedding layer
        self.node_embedding = tf.keras.layers.Embedding(input_dim=num_items, output_dim=hidden_dim, input_length=1)

        # Define graph convolutional layers
        self.gcn_layer1 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        self.gcn_layer2 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        
        # Define final prediction layer
        self.prediction_layer = tf.keras.layers.Dense(units=num_items, activation='softmax')

    def call(self, inputs):
        # Check input shape and extract node and edge features accordingly
        print(f'inputs.shape {inputs.shape}')
        if len(inputs.shape) == 1:
            # print(f'GNN inputs shape: {inputs.shape}')
            node_features = tf.expand_dims(tf.range(self.num_items), axis=-1)
            edge_features = inputs
        else:
            inputs = tf.reshape(inputs, shape=(-1, self.num_features + self.num_edge_features))
            node_features, edge_features = tf.unstack(inputs, axis=1)
        # Node embedding layer
        # print(f'node_features shape: {node_features.shape}')
        node_embeddings = self.node_embedding(node_features) # (batch_size, 1, hidden_dim)
        # print(f'node_embeddings shape: {node_embeddings.shape}')
        # print(f'node_embeddings: {node_embeddings}')
        # print(f'edge_features shape: {edge_features.shape}')
        # print(f'edge_features: {edge_features}')

        # Reshape edge_features tensor
        edge_features = tf.expand_dims(edge_features, axis=-1) # (batch_size, num_edge_features, num_edges, 1)
        # Matmul edge_features and node_embeddings tensors
        hidden1 = self.gcn_layer1(tf.linalg.matmul(tf.expand_dims(edge_features, axis=-1), node_embeddings)) # (batch_size, num_edge_features, num_edges, hidden_dim)
        hidden1 = tf.reshape(hidden1, [-1, self.num_edge_features, self.hidden_dim]) # (batch_size, num_edge_features, hidden_dim)
        hidden2 = self.gcn_layer2(tf.linalg.matmul(tf.expand_dims(edge_features, axis=-1), hidden1)) # (batch_size, num_edge_features, num_edges, hidden_dim)
        hidden2 = tf.reshape(hidden2, [-1, self.num_edge_features, self.hidden_dim]) # (batch_size, num_edge_features, hidden_dim)

        # Concatenate node features and hidden layers
        concat_features = tf.concat([node_embeddings, hidden1, hidden2], axis=-1) # (batch_size, 1, 3*hidden_dim)

        # Final prediction layer
        predictions = self.prediction_layer(tf.squeeze(concat_features, axis=1)) # (batch_size, num_items)
        # print(f'Predictions shape: {predictions.shape}')
        print(f'Predictions: {predictions}')
        return predictions



class DQNCritic(tf.keras.Model):
    def __init__(self, num_items, hidden_dim):
        super(DQNCritic, self).__init__()
        self.num_items = num_items
        self.hidden_dim = hidden_dim
        
        # Define dense layers
        self.dense1 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        self.dense2 = tf.keras.layers.Dense(units=1, activation=None)

    def call(self, inputs):
        # Pass input through dense layers
        print(f'DQN inputs.shape {inputs.shape}')
        x = self.dense1(inputs)
        x = self.dense2(x)
        
        # Reshape output to (batch_size, num_items)
        q_values = tf.reshape(x, shape=(-1, self.num_items))
        print(f'q_values {q_values.shape}')
        return q_values
    
class RecommenderEnv(gym.Env):
    def __init__(self, session_ids, item_ids, next_item_ids, next_session_ids, adj_matrix):
        super(RecommenderEnv, self).__init__()

        self.session_ids = session_ids
        self.item_ids = item_ids
        self.next_item_ids = next_item_ids
        self.next_session_ids = next_session_ids
        self.adj_matrix = adj_matrix

        self.num_items = adj_matrix.shape[0]
        self.num_sessions = len(np.unique(session_ids))

        self.action_space = spaces.Discrete(self.num_items)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.num_items,))

        self.current_session = 0
        self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])
        self.current_state = np.zeros(self.num_items)

    def reset(self):
        self.current_session = 0
        self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])
        self.current_state = np.zeros(self.num_items)
        return self.current_state

    def step(self, action):
        # Update current state
        self.current_state[action] = 1

        # Get the reward for the action
        if action.ref() in self.current_session_items:
            reward = 1
        else:
            reward = 0

        # Move to the next session if the current session has ended
        if self.next_session_ids[self.current_session] != self.current_session:
            self.current_session = self.next_session_ids[self.current_session]
            self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])

        # Check if the episode is over
        done = self.current_session == self.num_sessions - 1

        return self.current_state, reward, done, {}

# Define hyperparameters
batch_size = 128
num_epochs = 10
learning_rate = 0.001

# Define the GNNActor model
actor_model = GNNActor(num_items, num_features=1, num_edge_features=1, hidden_dim=32)


# Define the DQNCritic model
critic_model = DQNCritic(num_items, hidden_dim)

# Define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Define loss functions
actor_loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
critic_loss_fn = tf.keras.losses.MeanSquaredError()

# Define metrics
actor_metrics = tf.keras.metrics.SparseCategoricalAccuracy()
critic_metrics = tf.keras.metrics.MeanSquaredError()

# Train the models
for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs}')
    num_batches = len(session_ids) // batch_size
    for batch_idx in range(num_batches):
        # Select batch data
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx + 1) * batch_size
        batch_session_ids = session_ids[start_idx:end_idx]
        batch_item_ids = item_ids[start_idx:end_idx]
        batch_next_item_ids = next_item_ids[start_idx:end_idx]
        batch_adj_matrix = adj_matrix
        
        # Compute target Q values using critic model
        with tf.GradientTape() as tape:
            # Predict Q values for the next state
            q_values_next = critic_model(batch_next_item_ids, actor_model(batch_adj_matrix(batch_item_ids)))
            
            # Compute target Q values
            rewards = tf.ones_like(q_values_next)
            q_values_target = rewards + 0.99 * tf.reduce_max(q_values_next, axis=-1)
        
        # Compute critic loss
        critic_loss = critic_loss_fn(q_values_target, critic_model(batch_item_ids, actor_model(batch_adj_matrix(batch_item_ids))))
        
        # Compute gradients and update critic model
        critic_grads = tape.gradient(critic_loss, critic_model.trainable_variables)
        optimizer.apply_gradients(zip(critic_grads, critic_model.trainable_variables))
        
        # Update critic metrics
        critic_metrics.update_state(q_values_target, critic_model(batch_item_ids, actor_model(batch_adj_matrix(batch_item_ids))))
        
        # Compute action probabilities using actor model
        with tf.GradientTape() as tape:
            action_probabilities = actor_model(batch_adj_matrix(batch_item_ids))
            
        # Compute actor loss
        actor_loss = actor_loss_fn(batch_next_item_ids, action_probabilities)
        
        # Compute gradients and update actor model
        actor_grads = tape.gradient(actor_loss, actor_model.trainable_variables)
        optimizer.apply_gradients(zip(actor_grads, actor_model.trainable_variables))
        
        # Update actor metrics
        actor_metrics.update_state(batch_next_item_ids, action_probabilities)
        
        # Print batch metrics
        print(f'Batch {batch_idx}/{num_batches} - Critic loss: {critic_loss}, Critic MSE: {critic_metrics.result()}, Actor loss: {actor_loss}, Actor accuracy: {actor_metrics.result()}')
        
    # Reset metrics
    actor_metrics.reset_states()
    critic_metrics.reset_states()


(3007, 3007)
3007
Epoch 0/10


TypeError: 'SparseTensor' object is not callable