# CartPole Models Executor
By Giulio Vaccari

## Load models and initialize environment

In [1]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
# Global params
n_actions = 2

# DQN specific params
dqn_epsilon_eval = 0.05

In [3]:
# Load models and weights

# DQN Agent
qdn_model = keras.models.load_model("dqn_model_nt.h5")

# Actor-Critc Agent
actor_network = keras.models.load_model("ac_policy_nt.h5")
#critic_network = keras.models.load_model("ac_value_nt.h5")



In [4]:
# Init gym environment
env = gym.make("CartPole-v1")

In [5]:
# Define functions used by the agents

# DQN Policy
def epsilon_greedy_policy(model, state, n_actions, epsilon=0):
    if np.random.rand() < epsilon:
        # Perform random action
        random_move = np.random.randint(n_actions)
        return random_move
    else:
        # Obtain q-values for each possible action
        q_values = model.predict(state[None])
        # Choose action with maximum value
        return np.argmax(q_values)

# Actor-Critic Policy
def actor_critic_policy(state, policy_network):
    # Obtain probability distribution over the actions for the specified game state
    p_dist = policy_network.predict(state[None])
    # Sample a random action using that distribution
    action = np.random.choice(n_actions, p=p_dist[0])
    return action

## Game Playing

### DQN Agent

In [6]:
for episode in range(1): # Play one game
    state = env.reset()
    for t in range(1000):
        env.render()
        action = epsilon_greedy_policy(qdn_model, state, n_actions, epsilon=dqn_epsilon_eval)
        state, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

Episode finished after 184 timesteps


### Actor-Critic Agent

In [7]:
for episode in range(1): # Play one game
    state = env.reset()
    for t in range(1000):
        env.render()
        action = actor_critic_policy(state, actor_network)
        state, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

Episode finished after 500 timesteps
