In [2]:
import numpy as np
import tensorflow as tf
import recsim

from recsim.environments import interest_evolution
from recsim.agents import full_slate_q_agent, random_agent
from recsim.simulator import runner_lib

In [3]:
# Declare standard Full Slate Q-Agent and SlateQ agent

def create_q_agent(sess, environment, eval_mode, summary_writer=None):
    """
    Standard, non-decomposed Q-learning
    """
    kwargs = {
      'observation_space': environment.observation_space,
      'action_space': environment.action_space,
      'summary_writer': summary_writer,
      'eval_mode': eval_mode,
    }
    return full_slate_q_agent.FullSlateQAgent(sess, **kwargs)



def create_decomp_q_agent(sess, environment, eval_mode, summary_writer=None):
    """
    This is one variant of the agent featured in SlateQ paper
    """
    kwargs = {
      'observation_space': environment.observation_space,
      'action_space': environment.action_space,
      'summary_writer': summary_writer,
      'eval_mode': eval_mode,
    }
    return slate_decomp_q_agent.create_agent(agent_name= 'slate_optimal_optimal_q', sess=sess, **kwargs)
     


In [4]:
# environment config

seed = 0
np.random.seed(seed)
env_config = {
  'num_candidates': 350,
  'slate_size': 9,
  'resample_documents': True,
  'seed': seed,
  }

In [None]:
tmp_q_dir = './results/fullslate_q/'
tf.compat.v1.disable_eager_execution()
runner = runner_lib.TrainRunner(
    base_dir=tmp_q_dir,
    create_agent_fn=create_q_agent,
    env=interest_evolution.create_environment(env_config),
    episode_log_file="",
    max_training_steps=1,
    num_iterations=1,
    max_steps_per_episode=200)
runner.run_experiment()

INFO:tensorflow:max_training_steps = 1, number_iterations = 1,checkpoint frequency = 1 iterations.


INFO:tensorflow:max_training_steps = 1, number_iterations = 1,checkpoint frequency = 1 iterations.


INFO:tensorflow:max_steps_per_episode = 200


INFO:tensorflow:max_steps_per_episode = 200
2023-03-08 10:49:41.298931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 10:49:41.622758: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-03-08 10:49:41.622833: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kshitij-Inspiron-5570): /proc/driver/nvidia/version does not exist


In [None]:
# evaluating

runner = runner_lib.EvalRunner(
      base_dir=tmp_q_dir,
      create_agent_fn=create_q_agent,
      env=interest_evolution.create_environment(env_config),
      max_eval_episodes=5,
      test_mode=True)
runner.run_experiment()

In [None]:
# training

tmp_decomp_q_dir = './results/decomp_q/'
runner = runner_lib.TrainRunner(
    base_dir=tmp_decomp_q_dir,
    create_agent_fn=create_decomp_q_agent,
    env=interest_evolution.create_environment(env_config),
    episode_log_file="",
    max_training_steps=5,
    num_iterations=1)
runner.run_experiment()

In [None]:
# evaluating

runner = runner_lib.EvalRunner(
      base_dir=tmp_decomp_q_dir,
      create_agent_fn=create_decomp_q_agent,
      env=interest_evolution.create_environment(env_config),
      max_eval_episodes=1,
      test_mode=True)
runner.run_experiment()

In [None]:
%load_ext tensorboard
%tensorboard --logdir=./results/fullslate_q/ --port=8001
%tensorboard --logdir=./results/decomp_q/  --port=8002