In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
from sklearn.preprocessing import LabelEncoder
import gym
from gym import spaces
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from scipy.sparse import coo_matrix
import scipy as sp
import copy
import random

In [4]:

# Data preprocessing
# Load the Yoochoose dataset and preprocess it to create session-based sequences of interactions
data = pd.read_csv('yoochoose_dataset/filtered_clicks.dat',
                   names=['session_id', 'timestamp', 'item_id', 'category'],
                   dtype={'session_id': 'int64', 'timestamp': 'str', 'item_id': 'int64', 'category': 'int64'},
                   parse_dates=['timestamp'])

# Create item and session maps
item_map = dict(zip(np.unique(data.item_id), range(len(np.unique(data.item_id)))))
session_map = dict(zip(np.unique(data.session_id), range(len(np.unique(data.session_id)))))

# Map item and session IDs
data['item_id'] = data['item_id'].map(item_map)
data['session_id'] = data['session_id'].map(session_map)

# Sort by session and timestamp
data = data.sort_values(['session_id', 'timestamp'])

# Create next item and session columns
data['next_item_id'] = data.groupby('session_id')['item_id'].shift(-1)
data['next_session_id'] = data.groupby('session_id')['session_id'].shift(-1)
data = data.dropna()

# Convert data to numpy arrays
session_ids = data['session_id'].values.astype('int32')
item_ids = data['item_id'].values.astype('int32')
next_item_ids = data['next_item_id'].values.astype('int32')
next_session_ids = data['next_session_id'].values.astype('int32')
timestamps = data['timestamp'].values

# Create a directed graph
graph = nx.DiGraph()

# Add nodes to the graph
graph.add_nodes_from(item_map.values())

# Add edges between items that co-occur in the same session
for session_id in np.unique(session_ids):
    items_in_session = item_ids[session_ids == session_id]
    for i in range(len(items_in_session)):
        for j in range(i + 1, len(items_in_session)):
            if not graph.has_edge(items_in_session[i], items_in_session[j]):
                graph.add_edge(items_in_session[i], items_in_session[j], weight=0)
            graph[items_in_session[i]][items_in_session[j]]['weight'] += 1

# Normalize edge weights
for u, v, d in graph.edges(data=True):
    d['weight'] /= np.sqrt(graph.degree(u) * graph.degree(v))            

# Create adjacency matrix
adj_matrix = coo_matrix(nx.to_numpy_array(graph, weight='weight', dtype=np.float32))
adj_matrix = tf.sparse.SparseTensor(indices=np.array([adj_matrix.row, adj_matrix.col]).T,
                                    values=adj_matrix.data,
                                    dense_shape=adj_matrix.shape)    
print(adj_matrix.shape)
num_nodes = adj_matrix.shape[0] 
    

num_items = num_nodes
print(num_items)
    
embedding_dim = 100
num_layers = 2
hidden_dim = 100

(6558, 6558)
6558


In [5]:
# #split the preprocessed data into train-test sets based on time 
# #and ensure that there is no item in the test set that does not appear in the training set

# latest_timestamp = data['timestamp'].max()
# test_fraction = 0.2
# test_cutoff_timestamp = latest_timestamp - pd.Timedelta(test_fraction * (latest_timestamp - data['timestamp'].min()))
# test_session_ids = data.loc[data['timestamp'] > test_cutoff_timestamp, 'session_id'].unique()
# test_data = data.loc[data['session_id'].isin(test_session_ids)]
# train_data = data.loc[~data['session_id'].isin(test_session_ids)]
# num_sessions = len(train_data['session_id'].unique())
# latest_1_4 = train_data.loc[train_data['session_id'].isin(train_data['session_id'].unique()[-num_sessions // 4:])]
# latest_1_64 = train_data.loc[train_data['session_id'].isin(train_data['session_id'].unique()[-num_sessions // 64:])]

# num_train_sessions_1_4=len(latest_1_4['session_id'].unique())
# num_train_sessions_1_64=len(latest_1_64['session_id'].unique())
# num_test_sessions=len(test_data['session_id'].unique())

# print(f'number of training sessions for 1/64:  {num_train_sessions_1_64}')
# # print(f'number of test sessions for 1/64:  {num_test_sessions}')
# print(f'number of training sessions for 1/4:  {num_train_sessions_1_4}')
# # print(f'number of test sessions for 1/4:  {num_test_sessions}')

# num_clicks = len(data)
# print(f'number of clicks:  {num_clicks}')

In [6]:
class GNNActor(tf.keras.Model):
    def __init__(self, num_items, num_features, num_edge_features, hidden_dim=100):
        super(GNNActor, self).__init__()
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, num_features + num_edge_features), sparse=True)
        self.num_items = num_items
        self.num_features = num_features
        self.num_edge_features = num_edge_features
        self.hidden_dim = hidden_dim

        # Define node embedding layer
        self.node_embedding = tf.keras.layers.Embedding(input_dim=num_items, output_dim=hidden_dim, input_length=1)

        # Define graph convolutional layers
        self.gcn_layer1 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        self.gcn_layer2 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        
        # Define final prediction layer
        self.prediction_layer = tf.keras.layers.Dense(units=num_items, activation='softmax')

    def call(self, inputs):
        # Check input shape and extract node and edge features accordingly
        # print(f'inputs.shape {inputs.shape}')
        if len(inputs.shape) == 1:
            # print(f'GNN inputs shape: {inputs.shape}')
            node_features = tf.expand_dims(tf.range(self.num_items), axis=-1)
            edge_features = inputs
            # Reshape edge_features tensor
            edge_features = tf.expand_dims(edge_features, axis=-1) # (batch_size, num_edge_features, num_edges, 1)
        else:
            node_features = inputs[:, :1]
            edge_features = inputs[:, 1:]
        # Node embedding layer
        # print(f'node_features shape: {node_features.shape}')
        node_embeddings = self.node_embedding(node_features) # (batch_size, 1, hidden_dim)
        # print(f'node_embeddings shape: {node_embeddings.shape}')
        # print(f'node_embeddings: {node_embeddings}')
        # print(f'edge_features shape: {edge_features.shape}')
        # print(f'edge_features: {edge_features}')
        
        # Matmul edge_features and node_embeddings tensors
        hidden1 = self.gcn_layer1(tf.linalg.matmul(tf.expand_dims(edge_features, axis=-1), node_embeddings)) # (batch_size, num_edge_features, num_edges, hidden_dim)
        hidden1 = tf.reshape(hidden1, [-1, self.num_edge_features, self.hidden_dim]) # (batch_size, num_edge_features, hidden_dim)
        hidden2 = self.gcn_layer2(tf.linalg.matmul(edge_features, hidden1)) # (batch_size, num_edge_features, num_edges, hidden_dim)
        hidden2 = tf.reshape(hidden2, [-1, self.num_edge_features, self.hidden_dim]) # (batch_size, num_edge_features, hidden_dim)
    
        # Concatenate node features and hidden layers
        concat_features = tf.concat([node_embeddings, hidden1, hidden2], axis=-1) # (batch_size, 1, 3*hidden_dim)

        # Final prediction layer
        predictions = self.prediction_layer(tf.squeeze(concat_features, axis=1)) # (batch_size, num_items)
        print(f'Predictions shape: {predictions.shape}')
        print(f'Predictions: {predictions}')
        return predictions


class DQNCritic(tf.keras.Model):
    def __init__(self, num_items, hidden_dim=100):
        super(DQNCritic, self).__init__()
        self.num_items = num_items
        self.hidden_dim = hidden_dim
        
        # Define dense layers
        self.dense1 = tf.keras.layers.Dense(units=hidden_dim, activation='relu')
        self.dense2 = tf.keras.layers.Dense(units=1, activation=None)

    def call(self, inputs):
        # Pass input through dense layers
        print(f'DQN inputs.shape {inputs.shape}')
        x = self.dense1(inputs)
        x = self.dense2(x)
        
        # Reshape output to (batch_size, num_items)
        q_values = tf.reshape(x, shape=(-1, self.num_items))
        print(f'q_values {q_values.shape}')
        return q_values
    
class RecommenderEnv(gym.Env):
    def __init__(self, session_ids, item_ids, next_item_ids, next_session_ids, adj_matrix):
        super(RecommenderEnv, self).__init__()

        self.session_ids = session_ids
        self.item_ids = item_ids
        self.next_item_ids = next_item_ids
        self.next_session_ids = next_session_ids
        self.adj_matrix = adj_matrix

        self.num_items = adj_matrix.shape[0]
        self.num_sessions = len(np.unique(session_ids))

        self.action_space = spaces.Discrete(self.num_items)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.num_items,))

        self.current_session = 0
        self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])
        self.current_state = np.zeros(self.num_items)

    def reset(self):
        self.current_session = 0
        self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])
        self.current_state = np.zeros(self.num_items)
        return self.current_state

    def step(self, action):
        # Update current state
        self.current_state[action] = 1

        # Get the reward for the action
        if action.ref() in self.current_session_items:
            reward = 1
        else:
            reward = 0

        # Move to the next session if the current session has ended
        if self.next_session_ids[self.current_session] != self.current_session:
            self.current_session = self.next_session_ids[self.current_session]
            self.current_session_items = set(self.item_ids[self.session_ids == self.current_session])

        # Check if the episode is over
        done = self.current_session == self.num_sessions - 1

        return self.current_state, reward, done, {}

    



In [None]:
    
# Define instance of environment 
env = RecommenderEnv(session_ids, item_ids, next_item_ids, next_session_ids, adj_matrix)
    
# Define the GNNActor model
gnn_actor = GNNActor(num_items, num_features=1, num_edge_features=1, hidden_dim=100)


# Define the DQNCritic model
dqn_critic = DQNCritic(num_items, hidden_dim=100)


# Define the loss function for the DQN
def dqn_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))


# Define the hyperparameters
learning_rate = 0.001
batch_size = 100
discount_factor = 0.99
exploration_rate = 1.0
min_exploration_rate = 0.01
exploration_decay_rate = 0.001
num_episodes = 1000
replay_buffer_size = 100000

# Define the optimizer for both models
optimizer = Adam(learning_rate)

# Compile both models
gnn_actor.compile(optimizer=optimizer, loss='categorical_crossentropy')
print("GNN model compiled")
dqn_critic.compile(optimizer=optimizer, loss=dqn_loss)
print("DQN model compiled")

# Create replay buffer
replay_buffer = deque(maxlen=replay_buffer_size)

# Define optimizers
actor_optimizer = tf.keras.optimizers.Adam(learning_rate)
critic_optimizer = tf.keras.optimizers.Adam(learning_rate)

# Define the function to sample a batch of experiences
def sample_batch(replay_buffer, batch_size):
    batch = random.sample(replay_buffer, batch_size)
    states = np.array([experience[0] for experience in batch])
    actions = np.array([experience[1] for experience in batch])
    rewards = np.array([experience[2] for experience in batch])
    next_states = np.array([experience[3] for experience in batch])
    dones = np.array([experience[4] for experience in batch])
    return states, actions, rewards, next_states, dones

# Define the function to compute the target Q-values
@tf.function
def compute_target_q_values(next_states, next_gnn_embeddings):
    next_states = tf.reshape(next_states, (next_states.shape[0], 1, -1))
    next_actions = actor_model([next_gnn_embeddings, next_states])
    next_q_values = critic_model([next_states, next_actions])
    return rewards + discount_factor * (1 - dones) * tf.squeeze(next_q_values)
num_features = 1
@tf.function
def compute_gradients(batch):
    states, actions, rewards, next_states, dones = batch

    with tf.GradientTape(persistent=True) as tape:
        # Compute logits for GNNActor
        # print(f'states.shape {states.shape}')
        action_probs = gnn_actor(states)
        selected_action_probs = tf.gather(action_probs, actions, batch_dims=1)
        log_probs = tf.math.log(selected_action_probs)

        # Compute Q values for DQNCritic
        state_features = tf.convert_to_tensor(states, dtype=tf.float32)
        next_state_features = tf.convert_to_tensor(next_states, dtype=tf.float32)
        q_values = dqn_critic(state_features)
        next_q_values = target_dqn_critic(next_state_features)
        target_q_values = rewards + gamma * tf.reduce_max(next_q_values, axis=1)
        td_errors = target_q_values - tf.gather(q_values, actions, batch_dims=1)

    # Compute gradients for GNNActor
    actor_gradients = tape.gradient(-log_probs, gnn_actor.trainable_variables)

    # Compute gradients for DQNCritic
    critic_gradients = tape.gradient(td_errors, dqn_critic.trainable_variables)

    return actor_gradients, critic_gradients

def apply_gradients(actor_gradients, critic_gradients):
    # Apply gradients to the GNNActor
    gnn_actor_optimizer.apply_gradients(zip(actor_gradients, gnn_actor.trainable_variables))

    # Apply gradients to the DQNCritic
    q_critic_optimizer.apply_gradients(zip(critic_gradients, q_critic.trainable_variables))


def update_target_networks(gnn_actor, dqn_critic, target_gnn_actor, target_dqn_critic, tau):
    for target_param, param in zip(target_gnn_actor.parameters(), gnn_actor.parameters()):
        target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

    for target_param, param in zip(target_dqn_critic.parameters(), dqn_critic.parameters()):
        target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

    
# Set hyperparameters
batch_size = 100
gamma = 0.99  # discount factor
tau = 0.001  # target network update rate
actor_lr = 0.001
critic_lr = 0.001
max_episodes = 1000  # maximum number of episodes to run
max_steps_per_episode = 100  # maximum number of steps per episode
replay_buffer_size = int(1e6)


# Initialize target networks
target_gnn_actor = GNNActor(num_items, num_features=1, num_edge_features=1, hidden_dim=100)
target_gnn_actor.set_weights(gnn_actor.get_weights())
target_dqn_critic = DQNCritic(num_items, hidden_dim)
target_dqn_critic.set_weights(dqn_critic.get_weights())

# Train the GNNActor and DQNCritic models jointly for session-based recommendation

# Define training loop
for episode in tqdm(range(max_episodes)):
    # Reset environment and get initial state
    state = env.reset()
    print(f'state shape: {state.shape}')
    episode_reward = 0
    
    for step in range(max_steps_per_episode):
        
        # Sample action from GNNActor
        action_probs = gnn_actor(state)
        # print(f'action_probs: {action_probs}')
        # Sample an action from the predicted probability distribution
        action = tf.random.categorical(action_probs, num_samples=1)[0, 0]
        # print(f'action: {action}')
        # Take action in environment
        next_state, reward, done, _ = env.step(action)

        # Update episode reward
        episode_reward += reward
        
        # Add experience to replay buffer
        replay_buffer.append((state, action, reward, next_state, done))
        
        # Sample batch from replay buffer
        if len(replay_buffer) >= batch_size:
            # Sample batch of experiences from replay buffer
            batch = sample_batch(replay_buffer, batch_size)

            # Compute gradients for GNNActor and DQNCritic
            actor_gradients, critic_gradients = compute_gradients(batch)
            
            # Apply gradients to GNNActor and DQNCritic
            apply_gradients(actor_gradients, critic_gradients, actor_optimizer, critic_optimizer)
            
            # Update target networks
            update_target_networks(gnn_actor, dqn_critic, target_gnn_actor, target_dqn_critic, tau)
        
        # Update state
        state = next_state
        
        # Check if episode is done
        if done:
            break
    
    # Print episode reward
    print(f'Episode {episode}: {episode_reward}')


GNN model compiled
DQN model compiled


  0%|          | 0/1000 [00:00<?, ?it/s]

state shape: (6558,)
