# found in 
https://github.com/philtabor/Youtube-Code-Repository/blob/master/ReinforcementLearning/PolicyGradient/reinforce/reinforce_keras.py

#from agents import PolicyGradientAgent

TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
```  
@tf.function
  def has_init_scope():
    my_constant = tf.constant(1.)
    with tf.init_scope():
      added = my_constant * 2
The graph tensor has name: input_8:0
```

In [13]:
import tensorflow as tf

<function tensorflow.python.framework.ops.init_scope()>

In [22]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Activation, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
import numpy as np

class Agent(object):
    def __init__(self, alpha, beta, gamma=0.99, n_actions=4,
                 layer1_size=1024, layer2_size=512, input_dims=8):
        self.gamma = gamma
        self.alpha = alpha
        self.beta = beta
        self.input_dims = input_dims
        self.fc1_dims = layer1_size
        self.fc2_dims = layer2_size
        self.n_actions = n_actions

        self.actor, self.critic, self.policy = self.build_actor_critic_network()
        self.action_space = [i for i in range(n_actions)]

    def build_actor_critic_network(self):
        input = Input(shape=(self.input_dims,))
        delta = Input(shape=[1])
        dense1 = Dense(self.fc1_dims, activation='relu')(input)
        dense2 = Dense(self.fc2_dims, activation='relu')(dense1)
        probs = Dense(self.n_actions, activation='softmax')(dense2)
        values = Dense(1, activation='linear')(dense2)

        def custom_loss(y_true, y_pred):
            out = K.clip(y_pred, 1e-8, 1-1e-8)
            log_lik = y_true*K.log(out)

            return K.sum(-log_lik*delta)

        actor = Model([input, delta], outputs=[probs])

        actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss)

        critic = Model([input], outputs=[values])

        critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')

        policy = Model([input], outputs=[probs])

        return actor, critic, policy

    def choose_action(self, observation):
        state = observation[np.newaxis, :]
        probabilities = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=probabilities)

        return action

    def learn(self, state, action, reward, state_, done):
        state = state[np.newaxis,:]
        state_ = state_[np.newaxis,:]
        critic_value_ = self.critic.predict(state_)
        critic_value = self.critic.predict(state)

        target = reward + self.gamma*critic_value_*(1-int(done))
        delta =  target - critic_value

        actions = np.zeros([1, self.n_actions])
        actions[np.arange(1), action] = 1

        self.actor.fit([state, delta], actions, verbose=0)

        self.critic.fit(state, target, verbose=0)

In [23]:

import os
# for keras the CUDA commands must come before importing the keras libraries
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import gym

env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

pg_agent = Agent(alpha=0.0005, beta=0.0005, gamma=0.99, n_actions=action_size, 
                               input_dims=state_size)
n_games = 500
pg_scores = []


for i in range(n_games):
    done = False
    score = 0
    observation = env.reset()
    for time in range(500):
        action = pg_agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        reward = reward if not done else -10
        score += reward
        pg_agent.learn(observation, action, reward, observation_, done)
        observation = observation_
        #ddqn_agent.learn()
        if done:
            break
            
    pg_scores.append(score)

    avg_score = np.mean(pg_scores[max(0, i-100):(i+1)])
    print('episode: ', i,'score: %.2f' % score,
          ' average score %.2f' % avg_score)

    if i % 10 == 0 and i > 0:
        #pg_agent.save_model()

        pass
    
x = [i+1 for i in range(n_games)]


_SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'input_14:0' shape=(None, 1) dtype=float32>]

In [3]:
from tensorflow.keras.models import Model

In [18]:
Model(inputs )

NameError: name 'inputs' is not defined

In [17]:
!pip

/bin/sh: pip: command not found


In [20]:
_input = Input(shape=(8,))
dense1 = Dense(64, activation='relu')(_input)
dense2 = Dense(64, activation='relu')(dense1)
probs = Dense(4, activation='softmax')(dense2)

In [26]:
Model([_input])

<tensorflow.python.keras.engine.training.Model at 0x130eded10>