# Reinforcement Learning Agent and Environment

## Install and define dependencies

In [1]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Box
import numpy as np
import random
import os
from PIL import Image
from PIL import ImageEnhance
import torch
import sys
sys.modules["gym"] = gym
from stable_baselines3 import SAC
import pandas as pd

In [2]:
# Path to training images
train_image_path = "../../GTSDB/images/distorted"
original_image_path = "../../GTSDB/images"
label_path = "../../GTSDB/labels"

## Build Environment

Action: The Agent is able to use continous values to change each image parameter (Sharpness, Brightness, Contrast, Color)

In [3]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='./models/YOLOv5_best_1000ep.pt')

Using cache found in C:\Users\phili/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-6-7 Python-3.10.9 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1050 Ti, 4096MiB)

Fusing layers... 
Model summary: 213 layers, 1817344 parameters, 0 gradients, 4.3 GFLOPs
Adding AutoShape... 


In [4]:
# Images
img = Image.open("../../GTSDB/images/00000.png")

# Inference
temp = np.asarray(img)
results = model(temp)

# Results
temp2 = results.pandas().xyxy[0]  # Pandas DataFrame
print(temp2)

         xmin        ymin        xmax        ymax  confidence  class  \
0  774.867065  409.339874  814.390442  445.731384    0.272725     11   

                                    name  
0  Right-of-way at the next intersection  


In [5]:
class DistortionEnv(Env):

    image_width = 640
    image_height = 640

    
    # Resizes the image to 128x128 and converts to 3 color channels
    @staticmethod
    def _load_and_convert_image(image_path):
        image = Image.open(image_path)
        image = image.resize((DistortionEnv.image_width, DistortionEnv.image_height))
        if image.mode == "RGBA":
            image = image.convert("RGB")
        return np.asarray(image)

    @staticmethod
    def _calculate_mse(image1, image2):
   
        # Resize the images if necessary (to ensure they have the same dimensions)
        image1 = image1.resize((DistortionEnv.image_width, DistortionEnv.image_height))
        image2 = image2.resize((DistortionEnv.image_width, DistortionEnv.image_height))
    
        # Convert the images to grayscale
        image1 = image1.convert("L")
        image2 = image2.convert("L")
    
        # Convert the images to numpy arrays
        arr1 = np.array(image1)
        arr2 = np.array(image2)
    
        # Calculate the MSE
        mse = np.mean((arr1 - arr2) ** 2)
        return mse

    # Convert YOLOv5 format labeled bounding boxes to coordinates
    @staticmethod
    def _convert_yolo_to_coordinates(yolo_box, image_width, image_height):
        net_class, center_x, center_y, width, height = yolo_box
        
        xmin = (center_x - width/2) * image_width
        ymin = (center_y - height/2) * image_height
        xmax = (center_x + width/2) * image_width
        ymax = (center_y + height/2) * image_height
        return net_class, xmin, ymin, xmax, ymax

    def _get_gt_coords_from_yolo_file(self, file_path):
        yolo_boxes = []
        if os.path.exists(file_path):
            with open(file_path, 'r') as file:
                for line in file:
                    line = line.strip()  
                    net_class, center_x, center_y, width, height = line.split()     
                    
                    center_x = float(center_x)
                    center_y = float(center_y)
                    width = float(width)
                    height = float(height)
                    
                    entry = (net_class, center_x, center_y, width, height)
                    yolo_boxes.append(entry)

        # Convert Yolo bounding boxes to coordinates
        gt_coords = []
        for yolo_box in yolo_boxes:
            box_coords = self._convert_yolo_to_coordinates(yolo_box, DistortionEnv.image_width, DistortionEnv.image_height)
            gt_coords.append(box_coords)
        return gt_coords
        
    @staticmethod
    def _calculate_iou(box1, box2):
        class1, xmin1, ymin1, xmax1, ymax1 = box1
        class2, xmin2, ymin2, xmax2, ymax2 = box2

        if (str(class1) != str(class2)):
            return 0
        
        # Calculate intersection area
        xmin_inter = max(xmin1, xmin2)
        ymin_inter = max(ymin1, ymin2)
        xmax_inter = min(xmax1, xmax2)
        ymax_inter = min(ymax1, ymax2)
    
        width_inter = max(0, xmax_inter - xmin_inter)
        height_inter = max(0, ymax_inter - ymin_inter)
        area_inter = width_inter * height_inter
    
        # Calculate union area
        area_box1 = (xmax1 - xmin1) * (ymax1 - ymin1)
        area_box2 = (xmax2 - xmin2) * (ymax2 - ymin2)
        area_union = area_box1 + area_box2 - area_inter
    
        # Calculate IoU
        iou = area_inter / area_union if area_union > 0 else 0
        return iou


    def _check_ious(self, image_width, image_height, gt_coords, predictions):
        # Iterate over prediction rows, calculate IoU with each ground truth box, keep highest IoU
        iou_values = []
        for index, row in predictions.iterrows():
            predicted_box = (row['class'], row['xmin'], row['ymin'], row['xmax'], row['ymax'])
        
            # Calculate IoU with each ground truth box
            iou_per_gt = []
            for gt_coord in gt_coords:
                iou = self._calculate_iou(gt_coord, predicted_box)
                iou_per_gt.append(iou)
        
            # Store the maximum IoU value for the predicted box
            if(len(iou_per_gt) != 0):
                max_iou = max(iou_per_gt)
                iou_values.append(max_iou)

        if (len(iou_values) != 0):
            mean_iou = sum(iou_values) / len(iou_values)
        else:
            if(len(gt_coords) == 0 and len(predictions) == 0):
                mean_iou = 1.0
            else:
                mean_iou = 0.0
        
        return mean_iou

    
   # @staticmethod
    #def _calculate_F1():
    
    #@staticmethod
#    def _calculate_reward():

    
    def __init__(self):
        # Load YOLOv5 model
        self.model = torch.hub.load('ultralytics/yolov5', 'custom', path='./models/YOLOv5_best_1000ep.pt')
        
        # Define the action space bounds for sharpness, contrast, brightness and color
        sharpness_bounds = (0.0, 2.0)
        contrast_bounds = (0.0, 2.0)
        brightness_bounds = (0.0, 2.0)
        color_bounds = (0.0, 2.0)
        num_actions = 4

        # Use when multiple actions are possible
        #self.action_space = Box(low = np.array([sharpness_bounds[0], contrast_bounds[0], brightness_bounds[0], color_bounds[0]]),
        #                       high = np.array([sharpness_bounds[1], contrast_bounds[1], brightness_bounds[1], color_bounds[1]]),
        #                       shape = (num_actions,), 
        #                       dtype = float)

        # TODO: Simplified to only learn adjusting the brightness. Has to be changed later
        self.action_space = Box(low = brightness_bounds[0],
                               high = brightness_bounds[1],
                               shape = (1,), 
                               dtype = 'float32')

        # Define the observation space for an image
        image_shape = (DistortionEnv.image_width, DistortionEnv.image_height, 3)  # (height, width, channels)
        image_dtype = np.uint8 

        # Load training images
        self.train_images = os.listdir(train_image_path)

        self.observation_space = Box(low = 0, high = 255, shape = image_shape, dtype = np.uint8)

    def reset(self):
        # TODO: Set duration? e.g. 10 consecutive actions possible, maybe should start with only 1
        self.remaining_actions = 1

        # Choose random image for this episode
        self.image_name = random.choice(self.train_images)
        
        # Load input image
        input_image_path = os.path.join(train_image_path, self.image_name)
        self.image_input = self._load_and_convert_image(input_image_path)
        
        # Load original image
        orig_image_path = os.path.join(original_image_path, self.image_name)
        self.image_original = self._load_and_convert_image(orig_image_path)
        
        # Load ground truth (label and bounding box file)
        label_filename = self.image_name.split('.')[0] + ".txt"
        self.gt_coords = self._get_gt_coords_from_yolo_file(os.path.join(label_path, label_filename))

        self.state = self.image_input.copy()
        return self.state, {}
        
    def step(self, action):
        self.remaining_actions -= 1
        
        # change image parameters according to action 
        distortion_factor = action
        enhancer = ImageEnhance.Brightness(Image.fromarray(self.state))
        enhanced_image = enhancer.enhance(distortion_factor)
        self.state = np.asarray(enhanced_image)
        
        # TODO: Reward calculation other than MSE! only for test purpose
        # calculate IoU for each prediction
        # test if it works
        results = self.model(self.state)
        predictions = results.pandas().xyxy[0]
        mean_iou = self._check_ious(DistortionEnv.image_width, DistortionEnv.image_height, self.gt_coords, predictions)
        
        reward = 1 if (mean_iou > 0.5) else -1
        observation = self.state
        done = True if (self.remaining_actions <= 0) else False
        info = {} # Placeholder
        
        return observation, reward, done, False, info 
        
    def render(self):
        pass

In [6]:
env = DistortionEnv()

Using cache found in C:\Users\phili/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-6-7 Python-3.10.9 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1050 Ti, 4096MiB)

Fusing layers... 
Model summary: 213 layers, 1817344 parameters, 0 gradients, 4.3 GFLOPs
Adding AutoShape... 


In [7]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        n_state, reward, done, _, info = env.step(action)
        score += reward
    print("Episode:{} Score:{}".format(episode, score))

Episode:1 Score:1
Episode:2 Score:-1
Episode:3 Score:1
Episode:4 Score:-1
Episode:5 Score:-1
Episode:6 Score:1
Episode:7 Score:-1
Episode:8 Score:1
Episode:9 Score:-1
Episode:10 Score:1


In [24]:
torch.cuda.empty_cache()

In [25]:
torch.cuda.memory_summary(device=None, abbreviated=False)



In [26]:
model = SAC("MlpPolicy", env, verbose=1, buffer_size=1, batch_size=1)
#model.learn(total_timesteps=1000, log_interval=10)
#model.save("sac_pendulum")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.17 GiB (GPU 0; 4.00 GiB total capacity; 2.39 GiB already allocated; 294.45 MiB free; 2.42 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF