In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import gym, rec_gym
import numpy as np
import tensorflow as tf

from rec_gym.runner import run_experiment
import gin
import rec_gym.envs.prim_env_v1

import pickle

  from ._conv import register_converters as _register_converters


In [3]:
gin.parse_config("""
PrimEnv1.n_items = 100
PrimEnv1.n_users = 10
PrimEnv1.n_rec   = 4
PrimEnv1.embedding_dimension = 2
PrimEnv1.cluster_var = 16
PrimEnv1.in_cluster_var = 4
PrimEnv1.user_change_prob = 0.05
PrimEnv1.reward_noise = 0
PrimEnv1.user_init_sigma = 4
PrimEnv1.user_ar_coef = 1
PrimEnv1.user_drift_sigma = 0
PrimEnv1.seed = 53
PrimEnv1.user_type = 'drifting'



""")

tf.reset_default_graph()
sess = tf.InteractiveSession()

def make_env():
    env = gym.make('prim-gym-v1')
    return env

In [4]:
from agents.ddpg import DDPGAgent 

Using TensorFlow backend.


In [5]:
env = make_env()

state_dim = env.embedding_dimension
action_dim = env.embedding_dimension
action_size = env.n_rec

agent = DDPGAgent(action_size = action_size,
                 state_dim = state_dim,
                 action_dim = action_dim,
                 gamma = .9,
                 sess = sess,
                 optimizer = tf.train.AdamOptimizer(
                     learning_rate=0.001
                 ),
                 max_tf_checkpoints_to_keep = 3,
                 experience_size = 1000,
                 batch_size = 64)

In [6]:
sess.run(tf.global_variables_initializer())
run_experiment(env, agent, t_train=10000, t_test=2000)

100%|██████████| 10000/10000 [06:17<00:00, 26.51it/s]
100%|██████████| 2000/2000 [01:16<00:00, 26.12it/s]


In [None]:
with open('./logs/ddpg_no_random_init_env_finished.npy', 'wb') as f:
    pickle.dump(file=f, obj=env)

In [7]:
from agents.utils import data_exploring_widget
data_exploring_widget(env)

interactive(children=(IntSlider(value=0, continuous_update=False, description='Time:', max=12000), Output()), …

In [None]:
from agents.random_agent import RandomAgent

env = make_env()

state_dim = env.embedding_dimension
action_dim = env.embedding_dimension
action_size = env.n_rec

agent = RandomAgent(action_size = action_size)

In [None]:
sess.run(tf.global_variables_initializer())
run_experiment(env, agent, t_train=10000, t_test=2000)

In [None]:
from agents.utils import data_exploring_widget
data_exploring_widget(env)

In [None]:
from agents.dqn import Qagent

env = make_env()

state_dim = env.embedding_dimension
action_dim = env.embedding_dimension
action_size = env.n_rec

agent = Qagent(sess=sess,
                 state_dim=(state_dim,),
                 action_dim=(action_dim,),
                 epsilon=0.4,
                 action_size=action_size,
                 logdir='./logs/',
                 replay_size=1000,
                 batch_size=2)

In [None]:
sess.run(tf.global_variables_initializer())
run_experiment(env, agent, t_train=10000, t_test=2000)

In [None]:
from agents.utils import data_exploring_widget
data_exploring_widget(env)