In [1]:
%pip install --upgrade gym
%pip install pygame

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
     ---------------------------------------- 0.0/721.7 kB ? eta -:--:--
      --------------------------------------- 10.2/721.7 kB ? eta -:--:--
     --- --------------------------------- 61.4/721.7 kB 812.7 kB/s eta 0:00:01
     --------------- ---------------------- 286.7/721.7 kB 2.5 MB/s eta 0:00:01
     ---------------------------- --------- 532.5/721.7 kB 3.3 MB/s eta 0:00:01
     -------------------------------------- 721.7/721.7 kB 3.5 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting gym-notices>=0.0.4 (from gym)
  Obtaining dependency information for gym-notices>=0.0.4 from https://files.pythonhosted.org/packages/25/

In [2]:
%pip install pandas
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install jupyter-tensorboard

Collecting jupyter-tensorboard
  Downloading jupyter_tensorboard-0.2.0.tar.gz (15 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: jupyter-tensorboard
  Building wheel for jupyter-tensorboard (setup.py): started
  Building wheel for jupyter-tensorboard (setup.py): finished with status 'done'
  Created wheel for jupyter-tensorboard: filename=jupyter_tensorboard-0.2.0-py2.py3-none-any.whl size=15280 sha256=f06a11a365be4450408796526f0b365fdbf21e55d8cc0446c9c7a5c6f677bf00
  Stored in directory: c:\users\redbe\appdata\local\pip\cache\wheels\9d\a7\53\b0a5a49c040337d6df9fb6bcdbea3d36eb823bd1c33c9e4ca2
Successfully built jupyter-tensorboard
Installing collected packages: jupyter-tensorboard
Successfully installed jupyter-tensorboard-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
!pip install Tensorboard



In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gym # for environment
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam # adaptive momentum
import random
import pickle as pkl
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import TensorBoard
import time
import os

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [2]:
tensorboard_callback = TensorBoard(log_dir='logs/')

In [3]:
class DQLAgent():

	def __init__(self, env):
		# parameters and hyperparameters

		# this part is for neural network or build_model()
		self.state_size = env.observation_space.shape[0] # this is for input of neural network node size
		self.action_size = env.action_space.n # this is for out of neural network node size

		# this part is for replay()
		self.gamma = 0.95
		self.learning_rate = 0.001

		# this part is for adaptiveEGreedy()
		self.epsilon = 1 # initial exploration rate
		self.epsilon_decay = 0.995
		self.epsilon_min = 0.01

		self.memory = deque(maxlen = 1000) # a list with 1000 memory, if it becomes full first inputs will be deleted

		self.model = self.build_model()

	def build_model(self):
		# neural network for deep Q learning
		model = Sequential()
		model.add(Dense(48, input_dim = self.state_size, activation = 'tanh')) # first hidden layer
		model.add(Dense(self.action_size, activation = 'linear')) # output layer
		model.compile(loss = 'mse', optimizer = Adam(learning_rate = self.learning_rate))
		return model

	def remember(self, state, action, reward, next_state, done):
		# storage
		self.memory.append((state, action, reward, next_state, done))

	def act(self, state):
		# acting, exploit or explore
		if random.uniform(0,1) <= self.epsilon:
				return env.action_space.sample()
		else:
				act_values = self.model.predict(state)
				return np.argmax(act_values)


	def replay(self, batch_size):
		# training

		if len(self.memory) < batch_size:
				return # memory is still not full

		minibatch = random.sample(self.memory, batch_size) # take 16 (batch_size) random samples from memory
		for state, action, reward, next_state, done in minibatch:
			if done: # if the game is over, I dont have next state, I just have reward
				target = reward
			else:
				target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
				# target = R(s,a) + gamma * max Q`(s`,a`)
				# target (max Q` value) is output of Neural Network which takes s` as an input
				# amax(): flatten the lists (make them 1 list) and take max value
			train_target = self.model.predict(state) # s --> NN --> Q(s,a)=train_target
			train_target[0][action] = target
			self.model.fit(state, train_target, verbose = 0, callbacks=[tensorboard_callback]) # verbose: dont show loss and epoch

	def adaptiveEGreedy(self):
		if self.epsilon > self.epsilon_min:
			self.epsilon *= self.epsilon_decay


if __name__ == "__main__":

	# initialize gym environment and agent
	env = gym.make('CartPole-v1', render_mode='human')
	agent = DQLAgent(env)

	batch_size = 16
	episodes = 20
	for e in range(episodes):

		# initialize environment
		state = env.reset()
		# Coge el array del estado, que está en la posición 0
		state = np.reshape(state[0], [1,4])

		time = 0 # each second I will get reward, because I want to sustain a balance forever
		while True:

			# act
			action = agent.act(state)

			# step
			# Devuelve 5 valores, no 4
			next_state, reward, done, _, _ = env.step(action)
			next_state = np.reshape(next_state, [1,4])

			# remember / storage
			agent.remember(state, action, reward, next_state, done)

			# update state
			state = next_state

			# replay
			agent.replay(batch_size)

			# adjust epsilon
			agent.adaptiveEGreedy()

			time += 1

			if done:
				print('episode: {}, time: {}'.format(e, time))
				break

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  if not isinstance(terminated, (bool, np.bool8)):


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15

In [5]:
agent.model.save("model_cart_pole_1.keras")

In [13]:
loaded_model = load_model("model_cart_pole_1.keras")
agent.model = loaded_model

  trackable.load_own_variables(weights_store.get(inner_path))


In [14]:
agent = DQLAgent(env)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
trained_model = agent # Now I have trained agent state = env.reset() # Game will start with inital random state state = np.reshape(state, [1,4]) time_t = 0

state = env.reset()
time_t = 0

while True: 
	env.render() 
	action = trained_model.act(state) 
	nextstate, reward, done, _, _ = env.step(action) 
	next_state = np.reshape(next_state, [1,4]) 
	state = next_state 
	time_t += 1 
	print(time_t) 
	time.sleep(0.01) 
	if done: break

print('Done')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
Done


In [16]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [17]:
%pip show tensorboard

Name: tensorboard
Version: 2.16.2
Summary: TensorBoard lets you watch Tensors Flow
Home-page: https://github.com/tensorflow/tensorboard
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: c:\Users\redbe\anaconda3\Lib\site-packages
Requires: absl-py, grpcio, markdown, numpy, protobuf, setuptools, six, tensorboard-data-server, werkzeug
Required-by: tensorflow-intel
Note: you may need to restart the kernel to use updated packages.


In [18]:
# %tensorboard --logdir=logs --host localhost --port 8080
%tensorboard --logdir=logs

Reusing TensorBoard on port 6006 (pid 36300), started 2:51:13 ago. (Use '!kill 36300' to kill it.)

In [19]:
from tensorboard import notebook
notebook.list() # View open TensorBoard instances

Known TensorBoard instances:
  - port 6006: logdir logs (started 2:51:17 ago; pid 36300)


In [22]:
# Control TensorBoard display. If no port is provided, 
# the most recently launched TensorBoard is used
notebook.display(port=6006, height=1000) 

Selecting TensorBoard with logdir logs (started 2:51:58 ago; port 6006, pid 36300).


In [23]:
env.close()

**if time = 200, it means that I have 100% success because after 200 times the game resets**

**Test Part**

# change this cell to code
import time

trained_model = agent # Now I have trained agent
state = env.reset() # Game will start with inital random state
state = np.reshape(state, [1,4])
time_t = 0

while True:
    env.render()
    action = trained_model.act(state)
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1,4])
    state = next_state
    time_t += 1
    print(time_t)
    time.sleep(0.01)
    if done:
        break

print('Done')            