In [1]:
import numpy as np
import torch

# USER Imports
from ddpg import DDPGAgent
from env import GridWorldEnv

In [2]:
# USER PARAMETERS
grid_size = (5, 5)  # A 5x5 grid
obstacles = [(1, 1), (2, 2), (3, 3)]  # List of obstacle positions
start = (0, 0)  # Starting point A
end = (4, 4)  # Destination point B
num_episodes = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# Training function
def train(env, agent, num_episodes=100):
    for episode in range(num_episodes):
        state = env.reset(episode)  # Reset environment for each new episode
        done = False
        episode_reward = 0
        
        # print(f"Starting Episode {episode + 1}")
        
        while not done:
            # Select and clip action
            action = agent.select_action(state)
            action = int(np.clip(action, 0, 3))  # Ensure action is within valid range and convert to int
            
            # Execute the action in the environment
            next_state, reward, done, _ = env.step(action)
            
            # Store transition in replay buffer
            agent.add_to_replay_buffer(state, next_state, action, reward, done)
            
            # Train the agent
            agent.train()
            
            # Update state and accumulate reward
            state = next_state
            episode_reward += reward
            
            # Print the grid at each step
            env.render()
        
        # Print final episode information
        print(f"Episode {episode + 1} completed. Total Reward: {episode_reward}")
        print("\n")

    print("Training completed!")

In [4]:
# initilize the grid
grid = np.zeros(grid_size)
for obstacle in obstacles:
    grid[obstacle] = -1  
grid[start] = 1  
grid[end] = 2  

# Initialize the environment and agent
env = GridWorldEnv(grid_size, start, end, obstacles)
agent = DDPGAgent(state_dim=2, action_dim=4, max_action=3, device=device)

# Train the agent
train(env, agent, num_episodes)

VBox(children=(Label(value='Episode: 0'), Output()))

Episode 1 completed. Total Reward: -1.4




Episode 2 completed. Total Reward: 8.6




Episode 3 completed. Total Reward: -1.3




Episode 4 completed. Total Reward: -1.2




Episode 5 completed. Total Reward: -1.7999999999999998




Episode 6 completed. Total Reward: -2.2




Episode 7 completed. Total Reward: -1.2




Episode 8 completed. Total Reward: -1.4




Episode 9 completed. Total Reward: -1.2




Episode 10 completed. Total Reward: -2.5




Episode 11 completed. Total Reward: -1.6




Episode 12 completed. Total Reward: 9.1




Episode 13 completed. Total Reward: -1.7




Episode 14 completed. Total Reward: 6.799999999999999




Episode 15 completed. Total Reward: -1.1




Episode 16 completed. Total Reward: -3.400000000000001




Episode 17 completed. Total Reward: -4.400000000000002




Episode 18 completed. Total Reward: -1.1




Episode 19 completed. Total Reward: -1.4




Episode 20 completed. Total Reward: -2.3




Episode 21 completed. Total Reward: -1.1




Episode 22 completed. Total Reward: -1.9




Episode 23 completed. Total Reward: -2.0




Episode 24 completed. Total Reward: -1.1




Episode 25 completed. Total Reward: -1.2




Episode 26 completed. Total Reward: -2.3




Episode 27 completed. Total Reward: -5.999999999999998




Episode 28 completed. Total Reward: -1.2




Episode 29 completed. Total Reward: -1.3




Episode 30 completed. Total Reward: -1.6




Episode 31 completed. Total Reward: -3.800000000000001




Episode 32 completed. Total Reward: -2.2




Episode 33 completed. Total Reward: -2.5




Episode 34 completed. Total Reward: -1.2




Episode 35 completed. Total Reward: -2.8000000000000007




Episode 36 completed. Total Reward: -1.7




Episode 37 completed. Total Reward: -1.1




Episode 38 completed. Total Reward: -1.3




Episode 39 completed. Total Reward: -1.3




Episode 40 completed. Total Reward: -1.3




Episode 41 completed. Total Reward: -1.6




Episode 42 completed. Total Reward: -1.5




Episode 43 completed. Total Reward: -1.2




Episode 44 completed. Total Reward: -1.2




Episode 45 completed. Total Reward: -2.0




Episode 46 completed. Total Reward: -1.7




Episode 47 completed. Total Reward: -1.2




Episode 48 completed. Total Reward: -1.6




Episode 49 completed. Total Reward: -1.1




Episode 50 completed. Total Reward: -1.6




Episode 51 completed. Total Reward: -1.5




Episode 52 completed. Total Reward: -2.9000000000000004




Episode 53 completed. Total Reward: -2.0




Episode 54 completed. Total Reward: -1.3




Episode 55 completed. Total Reward: -1.2




Episode 56 completed. Total Reward: -1.6




Episode 57 completed. Total Reward: -2.0999999999999996




Episode 58 completed. Total Reward: -2.7




Episode 59 completed. Total Reward: -1.2




Episode 60 completed. Total Reward: -1.2




Episode 61 completed. Total Reward: -1.6




Episode 62 completed. Total Reward: -1.9




Episode 63 completed. Total Reward: -3.2000000000000006




Episode 64 completed. Total Reward: -1.3




Episode 65 completed. Total Reward: -1.4




Episode 66 completed. Total Reward: -2.0999999999999996




Episode 67 completed. Total Reward: -1.3




Episode 68 completed. Total Reward: 8.8




Episode 69 completed. Total Reward: -1.5




Episode 70 completed. Total Reward: -1.2




Episode 71 completed. Total Reward: -2.0999999999999996




Episode 72 completed. Total Reward: -3.1000000000000005




Episode 73 completed. Total Reward: -1.5




Episode 74 completed. Total Reward: -1.6




Episode 75 completed. Total Reward: -1.2




Episode 76 completed. Total Reward: -1.4




Episode 77 completed. Total Reward: -2.7




Episode 78 completed. Total Reward: -3.700000000000001




Episode 79 completed. Total Reward: -3.2000000000000006




Episode 80 completed. Total Reward: -3.1000000000000005




Episode 81 completed. Total Reward: -1.7




Episode 82 completed. Total Reward: -2.4000000000000004




Episode 83 completed. Total Reward: -1.6




Episode 84 completed. Total Reward: -1.2




Episode 85 completed. Total Reward: -2.0999999999999996




Episode 86 completed. Total Reward: -5.200000000000001




Episode 87 completed. Total Reward: -1.4




Episode 88 completed. Total Reward: -1.2




Episode 89 completed. Total Reward: -1.6




Episode 90 completed. Total Reward: -1.6




Episode 91 completed. Total Reward: -1.5




Episode 92 completed. Total Reward: -2.2




Episode 93 completed. Total Reward: -3.3000000000000007




Episode 94 completed. Total Reward: -1.6




Episode 95 completed. Total Reward: -1.5




Episode 96 completed. Total Reward: -1.5




Episode 97 completed. Total Reward: -2.0999999999999996




Episode 98 completed. Total Reward: -1.9




Episode 99 completed. Total Reward: -13.99999999999997




Episode 100 completed. Total Reward: -2.0


Training completed!
