In [1]:
import numpy as np
import torch

# USER Imports
from ddpg import DDPGAgent
from env import GridWorldEnv

In [2]:
# USER PARAMETERS
grid_size = (5, 5)  # A 5x5 grid
obstacles = [(1, 1), (2, 2), (3, 3)]  # List of obstacle positions
start = (0, 0)  # Starting point A
end = (4, 4)  # Destination point B
num_episodes = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# Training function
def train(env, agent, num_episodes=100):
    for episode in range(num_episodes):
        state = env.reset()  # Reset environment for each new episode
        done = False
        episode_reward = 0
        
        print(f"Starting Episode {episode + 1}")
        
        while not done:
            # Select and clip action
            action = agent.select_action(state)
            action = int(np.clip(action, 0, 3))  # Ensure action is within valid range and convert to int
            
            # Execute the action in the environment
            next_state, reward, done, _ = env.step(action)
            
            # Store transition in replay buffer
            agent.add_to_replay_buffer(state, next_state, action, reward, done)
            
            # Train the agent
            agent.train()
            
            # Update state and accumulate reward
            state = next_state
            episode_reward += reward
            
            # Print the grid at each step
            env.render()
        
        # Print final episode information
        print(f"Episode {episode + 1} completed. Total Reward: {episode_reward}")
        print("\n")

    print("Training completed!")

In [4]:
# initilize the grid
grid = np.zeros(grid_size)
for obstacle in obstacles:
    grid[obstacle] = -1  
grid[start] = 1  
grid[end] = 2  

# Initialize the environment and agent
env = GridWorldEnv(grid_size, start, end, obstacles)
agent = DDPGAgent(state_dim=2, action_dim=4, max_action=3, device=device)

# Train the agent
train(env, agent, num_episodes)

Starting Episode 1
[[ 1.  0.  0.  0.  0.]
 [ 3. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 1.  0.  0.  0.  0.]
 [ 3. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 1.  0.  0.  0.  0.]
 [ 0.  3.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
Episode 1 completed. Total Reward: -1.2


Starting Episode 2
[[ 3.  0.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 1.  0.  0.  0.  0.]
 [ 3. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 3.  0.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 3.  0.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  2.]]
[[ 1.  3.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.]
 [ 0.  0.  0.  0