<a href="https://colab.research.google.com/github/SheesASC24/myFirstRepo/blob/main/CartPole_Problem_Replicate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries
!pip install tensorflow==2.9.1 keras-rl2 gym

# Import libraries
import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

# Step 1: Initialize the Environment
env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n

# Step 2: Build the Model
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

model = build_model(states, actions)

# Step 3: Build the RL Agent
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Step 4: Train the Agent
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

# Step 5: Test the Agent
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(f"Average reward over 10 episodes: {np.mean(scores.history['episode_reward'])}")

# Step 6: Save the Agent's Weights
dqn.save_weights('dqn_weights.h5f', overwrite=True)

# Step 7: Reload the Agent and Test Again
del model
del dqn
del env

env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_weights.h5f')

_ = dqn.test(env, nb_episodes=5, visualize=True)





  np.bool8: (False, True),
  from jax import xla_computation as _xla_computation
  np.bool8: (False, True),
  logger.warn(
  deprecation(
  deprecation(
  super(Adam, self).__init__(name, **kwargs)


Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 11:06 - reward: 1.0000

  updates=self.state_updates,
  if not isinstance(terminated, (bool, np.bool8)):
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   31/10000 [..............................] - ETA: 3:42 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=s

98 episodes - episode_reward: 100.255 [10.000, 200.000] - loss: 3.252 - mae: 19.554 - mean_q: 39.518

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 200.000 [200.000, 200.000] - loss: 7.570 - mae: 38.624 - mean_q: 77.654

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 200.000 [200.000, 200.000] - loss: 9.835 - mae: 42.557 - mean_q: 85.350

Interval 4 (30000 steps performed)
50 episodes - episode_reward: 200.000 [200.000, 200.000] - loss: 6.745 - mae: 40.807 - mean_q: 81.831

Interval 5 (40000 steps performed)
done, took 425.518 seconds
Testing for 10 episodes ...


See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Average reward over 10 episodes: 200.0


  logger.warn(
  deprecation(
  deprecation(
  super(Adam, self).__init__(name, **kwargs)


Testing for 5 episodes ...


  updates=self.state_updates,
  if not isinstance(terminated, (bool, np.bool8)):


Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
