In [2]:
!apt-get update && apt-get install -y swig cmake
!apt-get install -y graphviz
# !pip install --upgrade gymnasium stable-baselines3
!pip install "stable-baselines3[extra]>=2.0.0a4"
!pip install torchviz

Get:1 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]              
Get:2 http://security.ubuntu.com/ubuntu focal-security InRelease [128 kB]    
Get:3 http://archive.ubuntu.com/ubuntu focal-updates InRelease [128 kB]
Get:4 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [30.9 kB]
Get:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease [128 kB]
Get:6 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]
Get:7 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]
Get:8 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [4092 kB]
Get:9 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]
Get:10 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1275 kB]
Get:11 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]
Get:12 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1566 kB]
Get:13 http://security

In [3]:
import gymnasium as gym
from gymnasium import spaces
from gymnasium.utils import seeding
import numpy as np
import os
from PIL import Image
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torchviz import make_dot
import shutil

import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage, VecEnvWrapper, VecFrameStack
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Print the versions of gymnasium and stable_baselines3 for debugging purposes
print(f"{gym.__version__=}")
print(f"{stable_baselines3.__version__=}")
print(torch.__version__)

gym.__version__='0.29.1'
stable_baselines3.__version__='2.4.0a10'
1.13.1+cu117


In [5]:
# Change the paths based on your directory structure
log_dir = "tensorboard3/"
models_dir = "models3/"
project_dir = ""

if os.path.exists(models_dir):
    shutil.rmtree(models_dir)
    shutil.rmtree(log_dir)
    print(f"Folder '{models_dir}' and all its contents have been deleted.")
else:
    print(f"Folder '{project_dir}' does not exist.")

os.makedirs(log_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)


Folder 'models3/' and all its contents have been deleted.


In [6]:
class ImageClassificationEnv(gym.Env):
    metadata = {"render_modes": []}

    def __init__(self, image_folder, hand):
        super(ImageClassificationEnv, self).__init__()
        self.hand = hand
        self.image_folder = image_folder
        self.image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

        # Observation space: A 3D array representing the image (64x64x3 for RGB images)
        self.observation_space = spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8)

        # Action space: Discrete actions corresponding to the classes
        if self.hand == 'left':
            self.action_space = spaces.Discrete(3)
        elif self.hand == 'right':
            self.action_space = spaces.Discrete(9)

        self.current_step = 0
        self.max_steps = 30
        self.current_image = None
        self.image_label = None

        # Initialize the random number generator
        self.np_random = None

    def load_images(self):
        # Randomly select images for the episode using the environment's RNG
        self.selected_images = self.np_random.choice(
            self.image_files, size=self.max_steps + 1, replace=False
        ).tolist()

    def reset(self, seed=None, options=None, **kwargs):
        # Handle the seed
        super().reset(seed=seed)
        if seed is not None:
            self.np_random, _ = seeding.np_random(seed)
        else:
            if self.np_random is None:
                self.np_random, _ = seeding.np_random()
        self.current_step = 0
        self.load_images()
        obs = self._next_observation()
        info = {}
        return obs, info

    def _next_observation(self):
        # Load the next image
        image_file = self.selected_images[self.current_step]
        image_path = os.path.join(self.image_folder, image_file)
        image = Image.open(image_path).resize((64, 64))
        self.current_image = np.array(image).astype(np.float64) / 255.0  # Keep as uint8

        # Load the corresponding label from the txt file
        label_file = image_file.replace('.jpg', '.txt')
        label_path = os.path.join(self.image_folder, label_file)
        with open(label_path, 'r') as file:
            label = file.read().strip()

        if self.hand == 'left':
            label_mapping = {'swarm_1': 0, 'swarm_2': 1}
            self.image_label = label_mapping.get(label, 2)  # Default to 2 for 'no action'
        elif self.hand == 'right':
            label_mapping = {
                'up': 0, 'down': 1, 'left': 2, 'right': 3, 'backwards': 4,
                'forward': 5, 'take_off': 6, 'land': 7
            }
            self.image_label = label_mapping.get(label, 8)  # Default to 8 for 'no action'

        return self.current_image

    def step(self, action):
        # Reward logic
        if action == self.image_label:
            reward = 1
        else:
            reward = -1

        self.current_step += 1
        terminated = self.current_step >= self.max_steps
        truncated = False  # Use 'truncated' if you have a time limit or similar

        obs = self._next_observation()
        info = {}
        return obs, reward, terminated, truncated, info

    def render(self):
        pass  # Rendering is not required for this environment

In [7]:
# Choose the hand to train the agent for
train_agent_for_left_hand = False
env_name = ""
if train_agent_for_left_hand:
    base_env = ImageClassificationEnv(image_folder='rl_train_dataset_left_hand', hand='left')
    env_name = 'rl_train_dataset_left_hand'
else:
    base_env = ImageClassificationEnv(image_folder='rl_train_dataset_right_hand', hand='right')
    env_name = 'rl_train_dataset_right_hand'

# Wrap the environment
base_env = Monitor(base_env)

# Use FrameStack wrapper to stack frames
num_stack = 3  # Number of frames to stack
env = DummyVecEnv([lambda: base_env])
env = VecFrameStack(env, n_stack=num_stack)
env = VecTransposeImage(env)  # Transpose to (C, H, W)

obs = env.reset()

In [8]:
class CustomCNN(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.Space, features_dim: int = 256):
        super(CustomCNN, self).__init__(observation_space, features_dim)
        n_input_channels = observation_space.shape[0]  # After VecTransposeImage, the shape is (C, H, W)

        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute the size of the output of the last convolutional layer
        with torch.no_grad():
            sample_input = torch.zeros(1, n_input_channels, observation_space.shape[1], observation_space.shape[2])
            n_flatten = self.cnn(sample_input).shape[1]

        self.linear = nn.Sequential(
            nn.Linear(n_flatten, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, features_dim),
            nn.ReLU(),
        )

        self._features_dim = features_dim

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        # Normalize the observations (images) to [0, 1] within the model
        observations = observations.float() / 255.0
        x = self.cnn(observations)
        x = self.linear(x)
        return x

In [9]:
# CNN Architecture Visualisation
# Create a mock observation space for visualization purposes
observation_space = gym.spaces.Box(low=0, high=255, shape=(3, 64, 64), dtype=np.uint8)

# Instantiate the model with the mock observation space
cnn_model = CustomCNN(observation_space)

# Create a sample input to pass through the model
sample_input = torch.rand((1, *observation_space.shape)).float()

# Pass the sample input through the model and visualize
output = cnn_model(sample_input)
make_dot(output, params=dict(cnn_model.named_parameters())).render(
    f"{project_dir}custom_cnn_model", format="png")

print("Custom CNN model architecture saved as 'custom_cnn_model.png'")

Custom CNN model architecture saved as 'custom_cnn_model.png'


In [10]:
# Function to print parameters in tabular format with dimensions
def count_parameters_with_dimensions(model):
    total_params = 0

    # Print table header
    print(f"{'Layer':<30} {'Dimensions':<30} {'Number of Parameters':<25}")
    print("-" * 75)

    for name, param in model.named_parameters():
        if param.requires_grad:
            num_params = param.numel()
            total_params += num_params
            # Print each layer, its dimensions, and parameter count
            print(f"{name:<30} {str(param.shape):<30} {num_params:<25}")

    # Print total parameters at the end
    print("-" * 75)
    print(f"{'Total Parameters':<60} {total_params:<20}")

# Call the function to output the parameters
count_parameters_with_dimensions(cnn_model)

Layer                          Dimensions                     Number of Parameters     
---------------------------------------------------------------------------
cnn.0.weight                   torch.Size([32, 3, 3, 3])      864                      
cnn.0.bias                     torch.Size([32])               32                       
cnn.2.weight                   torch.Size([64, 32, 3, 3])     18432                    
cnn.2.bias                     torch.Size([64])               64                       
cnn.4.weight                   torch.Size([128, 64, 3, 3])    73728                    
cnn.4.bias                     torch.Size([128])              128                      
cnn.6.weight                   torch.Size([256, 128, 3, 3])   294912                   
cnn.6.bias                     torch.Size([256])              256                      
linear.0.weight                torch.Size([256, 160000])      40960000                 
linear.0.bias                  torch.Size([2

In [11]:
# Define a callback class for saving models at regular intervals during training
class SaveOnIntervalCallback(BaseCallback):
    def __init__(self, save_interval: int, save_path: str, verbose=1):
        super().__init__(verbose)
        self.save_interval = save_interval
        self.save_path = save_path

    def _on_step(self) -> bool:
        # Save the model every 'save_interval' steps
        if self.num_timesteps % self.save_interval == 0:
            save_file = os.path.join(self.save_path, f'model_{self.num_timesteps}')
            self.model.save(save_file)
            if self.verbose > 0:
                print(f'Saving model to {save_file}.zip')
        return True

In [12]:
policy_kwargs = dict(
    features_extractor_class=CustomCNN,
    features_extractor_kwargs=dict(features_dim=256),
)

# Linear schedule from 2.5e-4 to 0
learning_rate = lambda f: f * 2.5e-4 

# Adjusted PPO hyperparameters
model = PPO(
    env=env,
    tensorboard_log=log_dir,
    policy='CnnPolicy',
    verbose=1,
    clip_range=0.1,             # Reduced clip range
    ent_coef=0.01,              # Reduced entropy coefficient
    learning_rate=learning_rate,
    n_epochs=4,                 # Reduced number of epochs
    n_steps=128,                # Reduced number of steps per update
    vf_coef=0.5,
    batch_size=256,             # Batch size divides n_steps * n_envs evenly
    device='cuda',
    policy_kwargs={'features_extractor_class': CustomCNN},
    seed=2
)

Using cuda device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=128 and n_envs=1)


In [None]:
# Define the interval at which models are saved during training
save_interval = 200000
save_callback = SaveOnIntervalCallback(save_interval, models_dir)

# Train the agent
total_timesteps = 10000000
model.learn(total_timesteps=total_timesteps, callback=save_callback, log_interval=200)

Logging to tensorboard3/PPO_1
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 30            |
|    ep_rew_mean          | -23.9         |
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 200           |
|    time_elapsed         | 217           |
|    total_timesteps      | 25600         |
| train/                  |               |
|    approx_kl            | 0.00021892413 |
|    clip_fraction        | 0.00195       |
|    clip_range           | 0.1           |
|    entropy_loss         | -1.71         |
|    explained_variance   | 0             |
|    learning_rate        | 0.000249      |
|    loss                 | 19.2          |
|    n_updates            | 796           |
|    policy_gradient_loss | 0.000768      |
|    value_loss           | 38.5          |
-------------------------------------------
------------------------------------------
| r

In [None]:
# Evaluate the agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} +/- {std_reward}")

# Save the model
model.save("final_"+env_name)