<a href="https://colab.research.google.com/github/SheesASC24/myFirstRepo/blob/main/CartPole_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Install necessary libraries
!pip install tensorflow==2.9.1 keras-rl2 gym pyvirtualdisplay imageio[ffmpeg] moviepy

# Import libraries
import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import imageio
import os
from IPython.display import HTML
from base64 import b64encode

# Step 1: Initialize the Environment
env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n

# Step 2: Build the Model
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

model = build_model(states, actions)

# Step 3: Build the RL Agent
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Step 4: Train the Agent
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

# Step 5: Test the Agent
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(f"Average reward over 10 episodes: {np.mean(scores.history['episode_reward'])}")

# Step 6: Save the Agent's Weights
dqn.save_weights('dqn_weights.h5f', overwrite=True)

# Step 7: Reload the Agent and Test with Visual Rendering
del model
del dqn
del env

env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_weights.h5f')

# Function to render and save the environment as a video
def render_episode(agent, environment, filename='cartpole.mp4'):
    frames = []
    state = environment.reset()
    done = False

    while not done:
        # Render environment and capture frame
        frame = environment.render(mode='rgb_array')
        frames.append(frame)

        # Select action
        action = agent.forward(state)
        state, reward, done, info = environment.step(action)

    agent.backward(reward, done)  # Update agent - Removed 'state' as it's not expected
    environment.close()

    # Save video
    video_path = os.path.join('/content/', filename)
    imageio.mimsave(video_path, frames, fps=30)
    return video_path

# Function to display video in Colab
def display_video(video_path):
    """Display the saved video in Colab."""
    video_file = open(video_path, "rb").read()
    video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
    return HTML(f"""<video width=400 controls>
                     <source src="{video_url}" type="video/mp4">
                   </video>""")

# Render an episode and save as video
video_path = render_episode(dqn, env, filename='cartpole.mp4')

# Display the video
display_video(video_path)


Training for 50000 steps ...
Interval 1 (0 steps performed)
   31/10000 [..............................] - ETA: 7:01 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=s

126 episodes - episode_reward: 78.524 [10.000, 200.000] - loss: 2.313 - mae: 18.595 - mean_q: 37.660

Interval 2 (10000 steps performed)
58 episodes - episode_reward: 174.155 [127.000, 200.000] - loss: 4.529 - mae: 37.977 - mean_q: 76.807

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 198.980 [183.000, 200.000] - loss: 7.060 - mae: 41.768 - mean_q: 84.346

Interval 4 (30000 steps performed)
50 episodes - episode_reward: 200.000 [200.000, 200.000] - loss: 16.667 - mae: 49.059 - mean_q: 98.930

Interval 5 (40000 steps performed)
done, took 509.761 seconds
Testing for 10 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Av

