In [4]:
import pandas as pd
import numpy as np
import networkx as nx
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
from sklearn.preprocessing import LabelEncoder
import gym
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy.sparse import coo_matrix
import scipy as sp

# Data preprocessing
# Load the Yoochoose dataset and preprocess it to create session-based sequences of interactions
data = pd.read_csv('yoochoose_dataset/filtered_clicks.dat',
                   names=['session_id', 'timestamp', 'item_id', 'category'],
                   dtype={'session_id': 'int64', 'timestamp': 'str', 'item_id': 'int64', 'category': 'int64'},
                   parse_dates=['timestamp'])

# Create item and session maps
item_map = dict(zip(np.unique(data.item_id), range(len(np.unique(data.item_id)))))
session_map = dict(zip(np.unique(data.session_id), range(len(np.unique(data.session_id)))))

# Map item and session IDs
data['item_id'] = data['item_id'].map(item_map)
data['session_id'] = data['session_id'].map(session_map)

# Sort by session and timestamp
data = data.sort_values(['session_id', 'timestamp'])

# Create next item and session columns
data['next_item_id'] = data.groupby('session_id')['item_id'].shift(-1)
data['next_session_id'] = data.groupby('session_id')['session_id'].shift(-1)
data = data.dropna()

# Convert data to numpy arrays
session_ids = data['session_id'].values.astype('int32')
item_ids = data['item_id'].values.astype('int32')
next_item_ids = data['next_item_id'].values.astype('int32')
next_session_ids = data['next_session_id'].values.astype('int32')
timestamps = data['timestamp'].values

# Create a directed graph
graph = nx.DiGraph()

# Add nodes to the graph
graph.add_nodes_from(item_map.values())

# Add edges to the graph
for session_id, items in tqdm(data.groupby('session_id')['item_id']):
    items = items.values.tolist()
    for i in range(len(items)-1):
        src, dst = items[i], items[i+1]
        graph.add_edge(src, dst)
        
        
# Create dense feature matrix
num_items = len(item_map)
features = np.eye(num_items, dtype='float32')[item_ids]

# # Create adjacency matrix
# adj_matrix = sp.sparse.coo_matrix(nx.to_numpy_array(graph, weight='weight', dtype=np.float32))
# adj_matrix = tf.sparse.SparseTensor(indices=np.array([adj_matrix.row, adj_matrix.col]).T,
#                                     values=adj_matrix.data,
#                                     dense_shape=adj_matrix.shape)

100%|██████████| 1756/1756 [00:00<00:00, 70231.69it/s]


In [5]:
# Now, we will define the GNN model. 
# We will use a simple GNN with a single hidden layer, 
# and we will use the mean squared error as the loss function.

class GNNModel(tf.keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim, adj_matrix):
        super().__init__()
        self.hidden_layer = tf.keras.layers.Dense(hidden_dim, activation="relu")
        self.output_layer = tf.keras.layers.Dense(output_dim, activation="softmax")
        self.adj_matrix = tf.sparse.SparseTensor(indices=adj_matrix[0], values=adj_matrix[1], dense_shape=adj_matrix[2])
        self.input_dim = input_dim
    
    def call(self, inputs):
        # Embed the input items
        item_embeddings = tf.one_hot(inputs, depth=self.input_dim)
        
        # Propagate the embeddings through the graph
        hidden_embeddings = tf.matmul(tf.matmul(self.adj_matrix, item_embeddings), self.hidden_layer.kernel)
        hidden_embeddings = self.hidden_layer(hidden_embeddings)
        
        # Aggregate the embeddings of the neighboring items
        neighbors = tf.matmul(self.adj_matrix, hidden_embeddings)
        neighbors_agg = tf.reduce_sum(neighbors, axis=1)
        # Concatenate the item embeddings with the aggregated neighbor embeddings
        embeddings = tf.concat([item_embeddings, neighbors_agg], axis=1)
    
        # Predict the next item
        outputs = self.output_layer(embeddings)
        return outputs


In [6]:
def get_adjacency_matrix(item_ids, session_ids):
    session_to_index = {}
    item_to_index = {}
    session_index = 0
    item_index = 0
    data = []

    for item, session in zip(item_ids, session_ids):
        if session not in session_to_index:
            session_to_index[session] = session_index
            session_index += 1
        if item not in item_to_index:
            item_to_index[item] = item_index
            item_index += 1
        data.append((session_to_index[session], item_to_index[item]))

    row, col = zip(*data)
    adj_matrix = sp.sparse.coo_matrix(([1]*len(row), (row, col)), shape=(session_index, item_index))
    return adj_matrix


In [None]:
class PolicyNetwork(tf.keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim, adj_matrix):
        super().__init__()
        self.hidden_layer = tf.keras.layers.Dense(hidden_dim, activation="relu")
        self.output_layer = tf.keras.layers.Dense(output_dim, activation="softmax")
        self.adj_matrix = tf.sparse.SparseTensor(indices=adj_matrix[0], values=adj_matrix[1], dense_shape=adj_matrix[2])
        self.input_dim = input_dim
    
    def call(self, inputs):
        # Embed the input session
        session_embeddings = tf.one_hot(inputs, depth=self.input_dim)
        
        # Propagate the embeddings through the graph
        hidden_embeddings = tf.matmul(tf.matmul(self.adj_matrix, session_embeddings), self.hidden_layer.kernel)
        hidden_embeddings = self.hidden_layer(hidden_embeddings)
        
        # Aggregate the embeddings of the neighboring sessions
        neighbors = tf.matmul(self.adj_matrix, hidden_embeddings)
        neighbors_agg = tf.reduce_sum(neighbors, axis=1)
        
        # Concatenate the session embeddings with the aggregated neighbor embeddings
        embeddings = tf.concat([session_embeddings, neighbors_agg], axis=1)
        
        # Predict the next item probability distribution
        outputs = self.output_layer(embeddings)
        return outputs


class ValueNetwork(tf.keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim, adj_matrix):
        super().__init__()
        self.hidden_layer = tf.keras.layers.Dense(hidden_dim, activation="relu")
        self.output_layer = tf.keras.layers.Dense(output_dim, activation="linear")
        self.adj_matrix = tf.sparse.SparseTensor(indices=adj_matrix[0], values=adj_matrix[1], dense_shape=adj_matrix[2])
        self.input_dim = input_dim
    
    def call(self, inputs):
        # Embed the input session
        session_embeddings = tf.one_hot(inputs, depth=self.input_dim)
        
        # Propagate the embeddings through the graph
        hidden_embeddings = tf.matmul(tf.matmul(self.adj_matrix, session_embeddings), self.hidden_layer.kernel)
        hidden_embeddings = self.hidden_layer(hidden_embeddings)
        
        # Aggregate the embeddings of the neighboring sessions
        neighbors = tf.matmul(self.adj_matrix, hidden_embeddings)
        neighbors_agg = tf.reduce_sum(neighbors, axis=1)
        
        # Concatenate the session embeddings with the aggregated neighbor embeddings
        embeddings = tf.concat([session_embeddings, neighbors_agg], axis=1)
        
        # Predict the value of the session
        outputs = self.output_layer(embeddings)
        return outputs

In [11]:
# Next, we will define the self-supervised reinforcement learning framework. 
# The framework consists of a policy network and a value network. 
# The policy network takes the current session as input and outputs a probability distribution over the items in the session. 
# The value network takes the current session and the recommended item as input and outputs a scalar value representing the expected future reward. 
# We will train the policy network using policy gradient and the value network using mean squared error.

class SessionRecommender:
    def __init__(self, data, input_dim, hidden_dim, output_dim, learning_rate, alpha):
        adj_matrix = get_adjacency_matrix(item_ids, session_ids)
        self.policy_network = GNNModel(input_dim, hidden_dim, output_dim, adj_matrix.tocsr())
        self.value_network = GNNModel(input_dim+output_dim, hidden_dim, 1, adj_matrix.tocsr())
        self.optimizer_policy = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.optimizer_value = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.alpha = alpha


    def get_action(self, session):
        session = np.array([session])
        item_prob_policy = self.policy_network(session).numpy().ravel()
        item_prob_gnn = self.gnn_model(session).numpy().ravel()
        item_prob_combined = self.alpha * item_prob_policy + (1 - self.alpha) * item_prob_gnn
        # computes the probability distributions output by the policy_network and the gnn_model, 
        # and then combines them using linear combination
        item_id_rec = np.random.choice(len(item_prob_combined), p=item_prob_combined)
        return item_id_rec

    
    def train(self, session, action, reward):
        # Compute the expected future reward using the value network
        session_expanded = tf.expand_dims(session, axis=0)
        action_expanded = tf.expand_dims(action, axis=0)
        session_action = tf.concat([session_expanded, tf.one_hot(action_expanded, depth=self.policy_network.input_dim)], axis=1)
        expected_reward = self.value_network(session_action)
        
        # Compute the advantage
        baseline = self.value_network(session_expanded)
        advantage = reward - baseline
        
        # Train the policy network using policy gradient
        with tf.GradientTape() as tape:
            logits = self.policy_network(session)
            probs = tf.nn.softmax(logits)
            log_prob = tf.math.log(probs[0][action])
            loss_policy = -log_prob * advantage
        
        grads_policy = tape.gradient(loss_policy, self.policy_network.trainable_variables)
        self.optimizer_policy.apply_gradients(zip(grads_policy, self.policy_network.trainable_variables))
        
        # Train the value network using mean squared error
        with tf.GradientTape() as tape:
            value = self.value_network(session_action)
            loss_value = tf.keras.losses.mean_squared_error(tf.constant([[reward]]), value)
        
        grads_value = tape.gradient(loss_value, self.value_network.trainable_variables)
        self.optimizer_value.apply_gradients(zip(grads_value, self.value_network.trainable_variables))


In [12]:
# Encode the items
item_encoder = LabelEncoder()
# item_encoder = OneHotEncoder(sparse=False)
item_encoder.fit(data["item_id"])


# Train the recommender system
recommender = SessionRecommender(data, len(item_encoder.classes_), hidden_dim=128, output_dim=64, learning_rate=1e-3, alpha=0.5)

for epoch in range(10):
    print(f"Epoch {epoch+1}")
    total_reward = 0
    total_steps = 0
    for session_id, group in train_data.groupby("session_id"):
        session_items = group["item_id"].tolist()
        session_items_encoded = item_encoder.transform(session_items)
        for i in range(len(session_items_encoded)):
            # Get the current state and action
            state = session_items_encoded[:i]
            action, action_prob = recommender.get_action(state)
            recommended_item = item_encoder.inverse_transform([action])[0]
            
            # Get the reward for the action
            if i == len(session_items_encoded) - 1:
                reward = 1 if recommended_item == session_items[-1] else 0
            else:
                reward = 0
                
            # Train the recommender system
            recommender.train(state, action, reward)
            
            # Update the total reward and total steps
            total_reward += reward
            total_steps += 1
            
            # Print the progress
            if total_steps % 1000 == 0:
                print(f"Processed {total_steps} steps, average reward: {total_reward / total_steps:.3f}")
                
    # Evaluate the recommender system on the test set
    if test_data.empty:
        print("Test data is empty, skipping evaluation")
        continue
    
    hits = []
    ndcgs = []
    mse = 0
    for session_id, group in test_data.groupby("session_id"):
        session_items = group["item_id"].tolist()
        session_items_encoded = item_encoder.transform(session_items)
        for i in range(len(session_items_encoded)-1):
            state = session_items_encoded[:i+1]
            next_item = session_items_encoded[i+1]
            logits = recommender.policy_network(state)
            probs = tf.nn.softmax(logits)
            top_k = tf.math.top_k(probs, k=10)[1][0]
            recommended_items = item_encoder.inverse_transform(top_k)
            if next_item in top_k:
                hits.append(1)
                ndcgs.append(1 / math.log(top_k.index(next_item)+2))
            else:
                hits.append(0)
                ndcgs.append(0)
            mse += (logits[0][next_item] - 1) ** 2
            
    precision = np.mean(hits)
    recall = np.mean(hits) / len(item_encoder.classes_)
    ndcg = np.mean(ndcgs)
    mse = mse / len(test_data)
    
    print(f"Precision: {precision:.3f}, Recall: {recall:.3f}, NDCG: {ndcg:.3f}, MSE: {mse:.3f}")

ValueError: TypeError: sparse matrix length is ambiguous; use getnnz() or shape[0]
Traceback (most recent call last):

  File "/Users/nigorakhonganieva/miniforge3/envs/data-science/lib/python3.9/site-packages/scipy/sparse/_base.py", line 345, in __len__
    raise TypeError("sparse matrix length is ambiguous; use getnnz()"

TypeError: sparse matrix length is ambiguous; use getnnz() or shape[0]

