# Goal
Now that we are able to detect the HexBugs using the `Yolo` model we trained, we can crop the specific regions containing the targets. But there is a catch!
Bounded rectangles doesn't have same shape! We need them to have same shape to feed them to our CNN model. To do so, we analyze the boundary box and crop the image in a way that it has a shape of `(300, 300)`. Since the predicted coordinations in the cropped image need to be scaled, we keep track of the boundary box's coordinations as well, and save them in a `data.csv` file to work with later.

### Imports
* `os` is essential to manage directories.
* `json` is needed to open `traco` json files, which contains our annotations.
* `torch` is required as we use `ultralytics`, which is implemented in `Pytorch`.
* `numpy` is used to convert images to arrays and vice versa.
* `cv2` is essential to read and show images.
* `pandas` is required to work with dataframes. e.g. the `data.csv` file we created before.
* `time` is required to track time.
* `ultralytics` is needed to work with our `Yolo` model.
* `supervision` is required to work with `Roboflow` smoothly.

In [1]:
import os
import json
import torch
import numpy as np
import cv2
from tqdm import tqdm
import pandas as pd
from time import time
from ultralytics import YOLO
import supervision as sv

from IPython.display import clear_output

### Trained model location
Here we put the location to the `best.pt` model we trained before. it must be at `runs/detect/train/weights/best.pt`

In [2]:
bug_model = 'runs/detect/train/weights/best.pt'

### Creating ObjectDetection class
To work easier with the trained model, we can create a class to do so.
#### Note Please
The original idea for this is for [Nicolai Høirup Nielsen](https://github.com/niconielsen32/YOLOv8-Class/blob/main/YOLOv8InferenceClass.py).

In [3]:
class ObjectDetection:
    # The class takes only the capture index. Index 0 is the main camera of the device and 
    # was used to test real-time capture.
    def __init__(self, capture_index):

        self.capture_index = capture_index
        
        # Checks if the device supports `cuda`
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        print("Using Device: ", self.device)

        self.model = self.load_model()

        self.CLASS_NAMES_DICT = self.model.model.names

        print(self.CLASS_NAMES_DICT)
        
        # Uses the supervision library to plot bounding boxes.
        self.box_annotator = sv.BoxAnnotator(sv.ColorPalette.default(), thickness=2, text_thickness=1, text_scale=1)


    def load_model(self):
        """
        Loads the trained Yolo model.
        
        @param: None
        @return: Model model
        """
        model = YOLO(bug_model)
        model.fuse()

        return model

    def predict(self, frame):
        """
        Predicts the results in the given image frame.
        
        @param: Image frame
        @return: numpy.ndarray results. contains `xyxy`, `confidence`, `class_ids` of detected objects.
        """
        results = self.model(frame)

        return results

    def plot_bboxes(self, results, frame):
        """
        Plots the bounding boxes around the detected results. It contains the class_id and the confidence score.
        
        @param: numpy.ndarray results. Is the output from self.predict()
        @return: Image frame. The results are all labeled in the original image.
        """
        
        # Creating lists to track the results output
        xyxys = []
        confidences = []
        class_ids = []

        # Extract detections for HexBug (class_id 0)
        for result in results:
            # Extracting the `boxes` data, which contains the bounding boxes' position.
            boxes = result.boxes.cpu().numpy()
            # Checks if the the detector couldn't find any results. In that case it will return the original image.
            if len(boxes) > 0:
                # There were at least one result found.
                class_id = boxes.cls[0]
                conf = boxes.conf[0]
                xyxy = boxes.xyxy[0]
                
                xyxys.append(result.boxes.xyxy.cpu().numpy())
                confidences.append(result.boxes.conf.cpu().numpy())
                class_ids.append(result.boxes.cls.cpu().numpy().astype(int))

        # Setup detections for visualization
        detections = sv.Detections(
            xyxy=results[0].boxes.xyxy.cpu().numpy(),
            confidence=results[0].boxes.conf.cpu().numpy(),
            class_id=results[0].boxes.cls.cpu().numpy().astype(int),
        )

        # Format custom labels
        self.labels = [f"{self.CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
                       for _, _, confidence, class_id, tracker_id
                       in detections]

        # Annotate and display frame
        frame = self.box_annotator.annotate(scene=frame, detections=detections, labels=self.labels)

        return frame

### Creating a class instance

In [4]:
detector = ObjectDetection(capture_index=0)

Model summary (fused): 168 layers, 11126358 parameters, 0 gradients, 28.4 GFLOPs


Using Device:  cuda
{0: '-', 1: 'HexBug'}


### Processing the data

In [5]:
# Makes sure that the temporary directory `cropped_bugs` doesn't contain any items.
if not os.path.exists('Test_data/cropped_bugs_multi_object'):
    os.mkdir('Test_data/cropped_bugs_multi_object')

# Creates a pandas dataframe which contains `frame_ID`, `file_path`, `(x, y) coordination of the Hexbug`, 
# and `(x, y) of its bounding box`
data = pd.DataFrame(columns=['NumHexBugsInPicture', 'HexBugID',
                             'OriginalBoxCoordinationX1', 'OriginalBoxCoordinationY1',
                             'Path', 'ID'])

In [8]:
# Defining the output results shape. This is the images' shape which will be fed to the head's detectoin
# algorithm later on.
max_height = 300
max_width = 300

CONF_THRESHOLD = 0.5

# Iterates over directory names (e.g. training01, training02, etc.) in the 'samples' directory to read the images..
for directory_name in tqdm(os.listdir('Test_data/Frames')):
    # Iterates over all images in the above mentioned directory.
    for sample_file_name in os.listdir(f'Test_data/Frames/{directory_name}'):
        # To get the correct annotation data, we need to know the frame_id.
        # Images are named like `frame01.jpg`, `frame02.jpg`, and etc.
        # To get the frame_id, we should seperate the `frame` word from the file_name.
        frame_id = int(sample_file_name.split('.')[0][5:])
        
#         print('------------------------------------------------------------------')
#         print(f'Processing file {directory_name}/{sample_file_name}')
        
        # Now we can simply define the sample path, and read the image using cv2.
        sample_path = f'Test_data/Frames/{directory_name}/{sample_file_name}'
        sample_img = cv2.imread(sample_path)
        
        # After reading the image, we are able to use our `detector` and its `predict(image)` method to compute results.
        prediction_results = detector.predict(sample_img)
        
#         print(sample_file_name)
#         for s in prediction_results[0].boxes.conf.cpu().numpy():
#             print(f'\t\t{s}')
#         print("============================================")
        
#         annotated_frame = detector.plot_bboxes(results=prediction_results, frame=sample_img)
        
#         cv2.imshow("image", annotated_frame)
#         cv2.waitKey(0)
#         cv2.destroyAllWindows()
#         clear_output()
        
        # Iterates over results to store the results.
        for result in prediction_results:
            # Extracting the `boxes` data, which contains the bounding boxes' position.
            boxes = result.boxes.cpu().numpy()
            
            # Checks if the the detector couldn't find any results. In that case it will return the original image.
            if len(boxes) > 0:
                for i in range(len(boxes)):
#                     print(boxes[i].conf)
                    if boxes[i].conf > CONF_THRESHOLD:
                        # Converts the predicted (x, y) position to a list of class `int` since the center supposed to be integers
                        xyxy = list(map(int, boxes.xyxy[i]))

                        # Creates an images by selecting the results' region. 
                        crop_img = sample_img[xyxy[1]: xyxy[3], xyxy[0]: xyxy[2]]

                        # Computes the output image's shape.
                        cropped_img_shape = crop_img.shape
                        
                        # Since the data must have the same shape, we are required to crop images in a way
                        # which contains the Hexbug, and also have a specific shape.
                        # To do so, we should compute the margins we want.
                        # `height_margin` and `width_margin` are computed by a simple subtraction of ideal image shape (300, 300)
                        # from the cropped image.
                        height_margin = max_height - cropped_img_shape[0]
                        width_margin = max_width - cropped_img_shape[1]

                        # Since the Hexbug can be in corners, in the middle, next to the walls,
                        # we should check in which direction we can extend the image to reach the specified shape.

                        # Checks if the image has enugh bottom-right margin to extend width.
                        # otherwise, it will extend it from top-left to left.
                        if xyxy[2] + width_margin > sample_img.shape[1]:
                            x1, x2 = xyxy[0] - width_margin, xyxy[2]
                        else:
                            x1, x2 = xyxy[0], xyxy[2] + width_margin

                        # Checks if the image has enugh bottom-right margin to extend height.
                        # otherwise, it will extend it from top-left to up.
                        if xyxy[3] + height_margin > sample_img.shape[0]:
                            y1, y2 = xyxy[1] - height_margin, xyxy[3]
                        else:
                            y1, y2 = xyxy[1], xyxy[3] + height_margin

                        # After adding margins, we have the margined_crop_image with desired shape.
                        margined_crop_img = sample_img[y1: y2, x1: x2]

                        # Checks if the directory for the cropped image exists.
                        if not os.path.exists(f'Test_data/cropped_bugs_multi_object/{directory_name}'):
                            os.mkdir(f'Test_data/cropped_bugs_multi_object/{directory_name}')

                        # Writes the margined_crop_image to the specified directory.
                        cv2.imwrite(f"Test_data/cropped_bugs_multi_object/{directory_name}/{sample_file_name.replace('.jpg', f'_{i}.jpg')}", margined_crop_img)

                        # To insert the data to the dataframe, we create a row with the given fields.
                        new_row = pd.DataFrame(
                            {'NumHexBugsInPicture': len(np.where(boxes.conf > CONF_THRESHOLD)[0]),
                             'HexBugID': i,
                             'OriginalBoxCoordinationX1': x1,
                             'OriginalBoxCoordinationY1': y1,
                             'Path': f"Test_data/cropped_bugs_multi_object/{directory_name}/{sample_file_name}",
                             'ID': frame_id},
                            index=[0])

                        # Now we reset the index and concatinate the new row with the dataframe.
                        data = pd.concat([new_row, data.iloc[:]]).reset_index(drop=True)

  0%|                                                     | 0/5 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'Test_data/cropped_bugs_multi_object/002'

In [7]:
# Saves the `data.csv` file to the same path, so it can be used later in model training.
data.to_csv('Test_data/data_multi_object.csv')