<a href="https://colab.research.google.com/github/Luke-687/Google-Colab/blob/main/PointToPoint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
pip install stable-baselines3[extra] gym matplotlib

Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cublas_cu12-12.4.5.8-py

In [9]:
!pip install shimmy 2.1
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import PPO

class PointToPointEnv(gym.Env):
    def __init__(self, size=10):
        super(PointToPointEnv, self).__init__()
        self.size = size
        self.start = np.array([0.0, 0.0], dtype=np.float32)
        self.goal = np.array([self.size - 1, self.size - 1], dtype=np.float32)
        self.state = self.start.copy()

        # Action: move in 2D (dx, dy) between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)

        # Observation: agent's position in 2D
        self.observation_space = spaces.Box(low=0, high=self.size, shape=(2,), dtype=np.float32)

        # For rendering
        self.fig, self.ax = None, None

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.state = self.start.copy()
        return self.state, {}

    def step(self, action):
        self.state = np.clip(self.state + action, 0, self.size)
        distance = np.linalg.norm(self.state - self.goal)
        terminated = distance < 0.5
        truncated = False # Assuming no time limit or other truncation condition
        reward = 100.0 if terminated else -1.0
        return self.state, reward, terminated, truncated, {}

    def render(self, mode='human'):
      if self.fig is None:
          plt.ion()
          self.fig, self.ax = plt.subplots()
      self.ax.clear()
      self.ax.set_xlim(0, self.size)
      self.ax.set_ylim(0, self.size)

      # Draw goal
      self.ax.plot(self.goal[0], self.goal[1], 'ro', markersize=10, label='Goal')

      # Draw agent
      self.ax.plot(self.state[0], self.state[1], 'bo', markersize=8, label='Agent')

      # Add grid
      self.ax.grid(True, which='both', linestyle='--', linewidth=0.5)

      # Add legend and titles
      self.ax.set_title("Agent Moving Step-by-Step to Goal")
      self.ax.legend()

      plt.draw()
      plt.pause(0.1)  # 0.1 second pause for visual effect

    def close(self):
        if self.fig:
            plt.close(self.fig)
            self.fig = None

# Create and train agent
env = PointToPointEnv()

# Wrap environment for vectorized training
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=50_000)

# Test agent
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()


env.close()


Collecting shimmy
  Using cached Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
[31mERROR: Could not find a version that satisfies the requirement 2.1 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for 2.1[0m[31m
[0mUsing cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 645      |
|    ep_rew_mean     | -544     |
| time/              |          |
|    fps             | 1505     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 512          |
|    ep_rew_mean          | -411         |
| time/                   |              |
|    fps                  | 867          |
|    iterations           | 2            |
|    time_ela

ValueError: You have passed a tuple to the predict() function instead of a Numpy array or a Dict. You are probably mixing Gym API with SB3 VecEnv API: `obs, info = env.reset()` (Gym) vs `obs = vec_env.reset()` (SB3 VecEnv). See related issue https://github.com/DLR-RM/stable-baselines3/issues/1694 and documentation for more information: https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecenv-api-vs-gym-api