# Import Libs

In [48]:
import random
import pandas as pd
import torch
import yolov5.utils
from yolov5.utils.dataloaders import LoadImages
from yolov5.utils.general import non_max_suppression, xyxy2xywh
import torch
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import os

%matplotlib inline

import torch

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [49]:
import os
current_path = os.getcwd()
current_path

'/Users/khang/Documents/IFN703'

In [50]:
import numpy as np
import random
import cv2
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

# Functions

In [51]:
HUBER_DELTA = 1.0
def smoothL1(y_true, y_pred):
    x = K.abs(y_true - y_pred)
    x = tf.where(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA))
    return K.sum(x)


def create_q_model():
    model = Sequential()
    model.add(Dense(1024, input_shape=(4096 + action_option*history_size,), activation='relu'))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(9, activation='linear'))
    model.compile(loss=smoothL1, optimizer='adam')
    return model


def create_vgg16():
    vgg16 = VGG16(weights='imagenet', include_top=True , pooling='max')
    #vgg16.summary()
    return vgg16

In [52]:
def read_image_index(basepath, test=True):
    """
    Reading the names of images from the directory structure.
    """
    index_list = []
    image_folder_path = os.path.join(basepath, "images")
    for image_file in os.listdir(image_folder_path):
        if image_file.endswith(".jpg"):
            index_list.append(image_file.split(".")[0])
    return index_list

def read_image(basepath):
    """
    Loading images using their name from the images folder.
    """
    image_list = []
    # Directories
    img_dir = Path(basepath + '/images')

    # List all image files in the directory
    supported_formats = ['.jpg', '.jpeg', '.png', '.bmp']
    image_paths = [p for p in img_dir.glob('*') if p.suffix.lower() in supported_formats]

    print(f"Found {len(image_paths)} images in {img_dir}")

    for img_path in image_paths:
        img = cv2.imread(str(img_path))
        if img is not None:
            image_list.append(img)
        else:
            print(f"Warning: {img_path} not found or invalid.")
    return image_list

def convert_yolo_label_to_coords(box, img_width, img_height):
    """
    Convert YOLO format (relative coordinates) to pixel coordinates.
    YOLO format is (x_center, y_center, width, height), all normalized.
    """
    x_center, y_center, width, height = box
    x_center *= img_width
    y_center *= img_height
    width *= img_width
    height *= img_height
    xmin = int(x_center - width / 2)
    ymin = int(y_center - height / 2)
    xmax = int(x_center + width / 2)
    ymax = int(y_center + height / 2)
    return [xmin, ymin, xmax, ymax]

def load_annotation(basepath, filtered_class):
    """
    Loading bounding boxes from TXT annotations in labels folder.
    Only extracts images with the "car" label.
    """
    bounding_box_list = []
    annotation_folder_path = os.path.join(basepath, "labels")

    # Directories
    img_dir = Path(basepath + '/images')
    lbl_dir = Path(basepath + '/labels')

    # List all image files in the directory
    supported_formats = ['.jpg', '.jpeg', '.png', '.bmp']
    image_paths = [p for p in img_dir.glob('*') if p.suffix.lower() in supported_formats]

    for img_path in image_paths:
        label_path = lbl_dir / (img_path.stem + '.txt')

        #try:
        # Read image to get dimensions
        img = cv2.imread(str(img_path))
        img_height, img_width = img.shape[:2]

        # Read ground truth labels from TXT
        with open(label_path, 'r') as f:
            ground_truth = [list(map(float, line.strip().split())) for line in f.readlines()]

        car_boxes = []
        # Filter for "car" label (label index 5 in the provided names list)
        for box in ground_truth:
            if len(box) == 5 and int(box[0]) == filtered_class: 
                car_boxes.append(convert_yolo_label_to_coords(box[1:], img_width, img_height))
        #if car_boxes:
        bounding_box_list.append(car_boxes)

        #except Exception as e:
        #    print(f"Error reading annotation for {img_path}: {e}")
        #    bounding_box_list.append([])  # Empty box list if failed to read annotation

    return bounding_box_list

def load_data_fil(dataset_path, test=False, filtered_class=5):
    """
    Loading dataset images and their corresponding bounding boxes.
    """
    image_index = read_image_index(dataset_path, test)
    image_list = read_image(dataset_path)
    bounding_box_list = load_annotation(dataset_path, filtered_class)

    # Optionally save to .npy files if needed
    if test:
        np.save("val_images.npy", image_list)
        np.save("val_boxes.npy", bounding_box_list)

    print(bounding_box_list[:5])
    print("DONE LOADING")

    return image_list, bounding_box_list

# Compute IoU (Intersection over Union) between the predicted mask and ground truth
def compute_iou(mask, ground_truth):
    dx = min(mask[2], ground_truth[2]) - max(mask[0], ground_truth[0])
    dy = min(mask[3], ground_truth[3]) - max(mask[1], ground_truth[1])

    if dx >= 0 and dy >= 0:
        inter_area = dx * dy
    else:
        inter_area = 0

    mask_area = (mask[2] - mask[0]) * (mask[3] - mask[1])
    ground_truth_area = (ground_truth[2] - ground_truth[0]) * (ground_truth[3] - ground_truth[1])

    return inter_area / (mask_area + ground_truth_area - inter_area)

# Function to extract features from image and history
def extract_feature(image, history, vgg16):
    history_feature = np.zeros(action_option * history_size)
    for i in range(history_size):
        if history[i] != -1:
            history_feature[i * action_option + history[i]] = 1

    feature_extractor = Model(inputs=vgg16.input, outputs=vgg16.layers[20].output)

    image_reshape = [(cv2.resize(image, (224, 224))).reshape(1, 224, 224, 3)]
    image_feature = feature_extractor(image_reshape)[0]
    image_feature = np.ndarray.flatten(image_feature.numpy())
    feature = np.concatenate((image_feature, history_feature))

    return np.array([feature])

# Q-Value computation
def compute_q(feature, deep_q_model):
    output = deep_q_model.predict(feature, verbose=0)
    return np.ndarray.flatten(output)

# Compute new mask based on the action
def compute_mask(action, current_mask):
    image_rate = 0.1
    delta_width = image_rate * (current_mask[2] - current_mask[0])
    delta_height = image_rate * (current_mask[3] - current_mask[1])

    dx1, dy1, dx2, dy2 = 0, 0, 0, 0

    # Apply the action to modify the bounding box (mask)
    if action == 0:
        dx1, dx2 = delta_width, delta_width
    elif action == 1:
        dx1, dx2 = -delta_width, -delta_width
    elif action == 2:
        dy1, dy2 = delta_height, delta_height
    elif action == 3:
        dy1, dy2 = -delta_height, -delta_height
    elif action == 4:
        dx1, dx2, dy1, dy2 = -delta_width, delta_width, -delta_height, delta_height
    elif action == 5:
        dx1, dx2, dy1, dy2 = delta_width, -delta_width, delta_height, -delta_height
    elif action == 6:
        dy1, dy2 = delta_height, -delta_height
    elif action == 7:
        dx1, dx2 = delta_width, -delta_width

    new_mask_tmp = np.array([current_mask[0] + dx1, current_mask[1] + dy1,
                             current_mask[2] + dx2, current_mask[3] + dy2])
    new_mask = np.array([
        min(new_mask_tmp[0], new_mask_tmp[2]),
        min(new_mask_tmp[1], new_mask_tmp[3]),
        max(new_mask_tmp[0], new_mask_tmp[2]),
        max(new_mask_tmp[1], new_mask_tmp[3])
    ])

    return new_mask

# Crop the image based on the mask
def crop_image(image, new_mask):
    height, width, channel = np.shape(image)
    new_mask = np.asarray(new_mask).astype("int")
    new_mask[0] = max(new_mask[0], 0)
    new_mask[1] = max(new_mask[1], 0)
    new_mask[2] = min(new_mask[2], width)
    new_mask[3] = min(new_mask[3], height)
    cropped_image = image[new_mask[1]:new_mask[3], new_mask[0]:new_mask[2]]
    new_height, new_width, new_channel = np.shape(cropped_image)

    if new_height == 0 or new_width == 0:
        cropped_image = np.zeros((224, 224, 3))
    else:
        cv2.resize(cropped_image, (224, 224))

    return cropped_image

# Load Test Data

## Car Test Data

In [72]:
test_image_list, test_bounding_box_list = load_data_fil("test")
test_image_list, test_bounding_box_list = load_data_fil("test")

# Ensure test_image_list and test_bounding_box_list have the same length
assert len(test_image_list) == len(test_bounding_box_list), "Image list and bounding box list must have the same length."

# Find indices of images that have exactly one bounding box
single_bbox_indices = [i for i, bbox_list in enumerate(test_bounding_box_list) if len(bbox_list) == 1]

# Randomly select 20 such images (if there are at least 50)
if len(single_bbox_indices) <200:
    print(f"Only {len(single_bbox_indices)} images with one bounding box found.")
    selected_indices = single_bbox_indices  # if less than 50, select all
else:
    selected_indices = random.sample(single_bbox_indices, 200)

# Select the images and their corresponding bounding boxes
selected_image_list = [test_image_list[i] for i in selected_indices]
selected_bounding_box_list = [test_bounding_box_list[i] for i in selected_indices]

print(f"Selected {len(selected_image_list)} images and their corresponding bounding boxes.")

Found 282 images in test/images
[[], [[68, 143, 604, 359]], [[5, 90, 625, 536]], [[514, 443, 528, 459], [530, 443, 546, 463], [541, 443, 558, 463], [566, 440, 583, 470], [600, 420, 640, 490], [586, 429, 610, 483], [143, 416, 219, 496], [0, 428, 26, 501], [41, 440, 105, 496]], []]
DONE LOADING
Found 282 images in test/images
[[], [[68, 143, 604, 359]], [[5, 90, 625, 536]], [[514, 443, 528, 459], [530, 443, 546, 463], [541, 443, 558, 463], [566, 440, 583, 470], [600, 420, 640, 490], [586, 429, 610, 483], [143, 416, 219, 496], [0, 428, 26, 501], [41, 440, 105, 496]], []]
DONE LOADING
Only 152 images with one bounding box found.
Selected 152 images and their corresponding bounding boxes.


## Person Test Data

In [54]:
person_test_image_list, person_test_bounding_box_list = load_data_fil("test", filtered_class=8)
# Ensure test_image_list and test_bounding_box_list have the same length
assert len(person_test_image_list) == len(person_test_bounding_box_list), "Image list and bounding box list must have the same length."

# Find indices of images that have exactly one bounding box
single_bbox_indices = [i for i, bbox_list in enumerate(person_test_bounding_box_list) if len(bbox_list) == 1]

# Randomly select 20 such images (if there are at least 50)
if len(single_bbox_indices) <50:
    print(f"Only {len(single_bbox_indices)} images with one bounding box found.")
    selected_indices = single_bbox_indices  # if less than 50, select all
else:
    selected_indices = random.sample(single_bbox_indices, 50)

# Select the images and their corresponding bounding boxes
person_selected_image_list = [person_test_image_list[i] for i in selected_indices]
person_selected_bounding_box_list = [person_test_bounding_box_list[i] for i in selected_indices]

print(f"Selected {len(person_selected_image_list)} images and their corresponding bounding boxes.")

Found 282 images in test/images
[[[136, 135, 270, 441], [283, 154, 469, 558]], [], [], [], [[273, 59, 449, 583], [85, 63, 255, 616]]]
DONE LOADING
Only 29 images with one bounding box found.
Selected 29 images and their corresponding bounding boxes.


# Load 2 Models

## YOLO

In [55]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/runs/train/yolov5_custom_augmented3/weights/best.pt', force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /Users/khang/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-10-19 Python-3.8.13 torch-2.3.0 CPU

Fusing layers... 
Model summary: 157 layers, 7042489 parameters, 0 gradients, 15.9 GFLOPs
Adding AutoShape... 


## Deep QN

### Car Model

In [76]:
history_size = 10
action_option = 9

# Change the model name with different parameters
model_link = 'well_train_model_max_step_20_gamma_0.1_epochs_10_trigger_threshold_0.5.h5'
vgg16 = create_vgg16()
deep_q_car = create_q_model()
deep_q_car.load_weights(model_link)

### Person Model

In [57]:
history_size = 10
action_option = 9

person_model_link = 'person_well_train_model_max_step_20_gamma_0.1_epochs_10_trigger_threshold_0.5.h5'
vgg16 = create_vgg16()
deep_q_person = create_q_model()
deep_q_person.load_weights(person_model_link)

# Test

## Car 

### YOLOv5 only

In [25]:
def test_yolo(selected_image_list, selected_bounding_box_list):
    iou_list = []
    for i in range(0,len(selected_image_list)):
        plt.figure()
        image = selected_image_list[i]
        bounding_box = selected_bounding_box_list[i][0]  # Assuming this is the ground truth
    
        # Perform prediction using YOLOv5
        results = model(image)
        predictions = results.xyxy[0]  # Predictions in (x1, y1, x2, y2, conf, class)
        print(len(predictions))
        if len(predictions) > 0:
            pred_box = predictions[0][:4].cpu().numpy()  # Taking the first predicted bounding box
            pred_box = pred_box.astype(int)
    
            # Draw predicted bounding box (in blue)
            cv2.rectangle(image, (pred_box[0], pred_box[1]), (pred_box[2], pred_box[3]), (0, 0, 255), 2)
    
        # Draw ground truth bounding box (in green)
        cv2.rectangle(image, (int(bounding_box[0]), int(bounding_box[1])),
                      (int(bounding_box[2]), int(bounding_box[3])), (0, 255, 0), 2)
    
        # Calculate IoU if a prediction exists
        if len(predictions) > 0:
            iou = compute_iou(pred_box, bounding_box)
            iou_list.append(iou)
        else:
            iou_list.append(0)            
    
        # Plot results
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(f"Predicted box in blue, Ground truth in green (Image {i})")
        plt.axis('off')
        plt.show()
    
    # Print average IoU
    print(f"Average IoU: {sum(iou_list) / len(iou_list)}")
    print("Length of IOU =", len(iou_list))

In [None]:
test_yolo(selected_image_list, selected_bounding_box_list)

### DQN

In [28]:
def predict_with_agent(image, bounding_box):
    print("PREDICTING WITH AGENT")
    history = [-1] * history_size
    height, width, channel = np.shape(image)
    
    current_mask = np.asarray([0, 0, width, height])
    
    feature = extract_feature(image, history, vgg16)
    end = False
    masks = []
    step = 0

    while not end:

        q_value = compute_q(feature, deep_q)

        action = np.argmax(q_value)
        print("ACTION = ",action)

        history = history[1:]
        history.append(action)

        if action == 8 or step == 10:
            end = True
            new_mask = current_mask
            return new_mask, step
        else:
            new_mask = compute_mask(action, current_mask)

        cropped_image = crop_image(image, new_mask)
        feature = extract_feature(cropped_image, history, vgg16)

        masks.append(new_mask)
        current_mask = new_mask
        cv2.rectangle(image, (int(current_mask[0]), int(current_mask[1])),
                      (int(current_mask[2]), int(current_mask[3])), (255, 0, 0), 1)
        step += 1

In [46]:
import matplotlib.patches as patches
def test_agent(selected_image_list, selected_bounding_box_list, num_imgs = 152):
    iou_list = []
    iou_list_yolo = []
    num_yolo_work = 0
    num_dqn_work = 0
    num_dqn_accepted = 0

    num_steps = []
    
    for i in range(num_imgs):
        
        image = selected_image_list[i]
        bounding_box = selected_bounding_box_list[i][0]  # Assuming this is the ground truth
    
        num_dqn_work += 1
        pred_box, step = predict_with_agent(image, bounding_box)
        
        num_steps.append(step)
        
        iou = compute_iou(pred_box.astype(int), bounding_box)
        if iou >= 0.5:
            num_dqn_accepted += 1

        iou_list.append(iou)
        # Draw ground truth bounding box (in green)
        cv2.rectangle(image, (int(bounding_box[0]), int(bounding_box[1])),
                      (int(bounding_box[2]), int(bounding_box[3])), (0, 255, 0), 2)

        cv2.rectangle(image, (int(pred_box[0]), int(pred_box[1])),
                          (int(pred_box[2]), int(pred_box[3])), (0, 0, 255), 2)
        
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()
           
                
    
    # Print average IoU
    print(f"Average IoU: {sum(iou_list) / len(iou_list)}")
    print(f"Average IoU: {sum(iou_list_yolo) / len(iou_list_yolo)}")
    
    #print("Number of times agent making prediction", num_dqn_work)
    #print("Number of times yolo making prediction", num_dqn_work)
    #print("Number of times agent making prediction and be accepted", num_dqn_accepted)

    return num_steps

In [None]:
deep_q = deep_q_car
num_steps = test_agent(selected_image_list, selected_bounding_box_list)

### YOLO + DQN

In [None]:
import matplotlib.patches as patches
def test_yolo_agent(selected_image_list, selected_bounding_box_list, num_imgs = 152):
    iou_list = []
    iou_list_yolo = []
    num_yolo_work = 0
    num_dqn_work = 0
    num_dqn_accepted = 0

    num_steps = []
    
    for i in range(num_imgs):
        
        image = selected_image_list[i]
        bounding_box = selected_bounding_box_list[i][0]  # Assuming this is the ground truth
    
        Perform prediction using YOLOv5
        results = model(image)
        predictions = results.xyxy[0]  # Predictions in (x1, y1, x2, y2, conf, class)
        print(len(predictions))
        if len(predictions) > 0:
            pred_box = predictions[0][:4].cpu().numpy()  # Taking the first predicted bounding box
            pred_box = pred_box.astype(int)

             # Calculate IoU if a prediction exists
            if len(predictions) > 0:
                iou = compute_iou(pred_box.astype(int), bounding_box)
                iou_list.append(iou)
                iou_list_yolo.append(iou)
                num_yolo_work += 1
    
        else:
            num_dqn_work += 1
            pred_box, step = predict_with_agent(image, bounding_box)
            
            num_steps.append(step)
            
            iou = compute_iou(pred_box.astype(int), bounding_box)
            if iou >= 0.5:
                num_dqn_accepted += 1
                
            iou_list.append(iou)
            # Draw ground truth bounding box (in green)
            cv2.rectangle(image, (int(bounding_box[0]), int(bounding_box[1])),
                          (int(bounding_box[2]), int(bounding_box[3])), (0, 255, 0), 2)
    
            cv2.rectangle(image, (int(pred_box[0]), int(pred_box[1])),
                              (int(pred_box[2]), int(pred_box[3])), (0, 0, 255), 2)
            
            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            plt.axis('off')
            plt.show()
           
                
    
    #Print average IoU
    print(f"Average IoU: {sum(iou_list) / len(iou_list)}")
    print(f"Average IoU: {sum(iou_list_yolo) / len(iou_list_yolo)}")
    
    #print("Number of times agent making prediction", num_dqn_work)
    #print("Number of times yolo making prediction", num_dqn_work)
    #print("Number of times agent making prediction and be accepted", num_dqn_accepted)

    return num_steps

In [None]:
test_yolo_agent(selected_image_list, selected_bounding_box_list, num_imgs = 152)

## Person

In [None]:
test_yolo(person_selected_image_list, person_selected_bounding_box_list)

In [None]:
deep_q = deep_q_person
num_steps = test_agent(person_selected_image_list, person_selected_bounding_box_list)

In [None]:
test_yolo_agent(person_selected_image_list, person_selected_bounding_box_list)

## Number of Steps

In [None]:
# Count occurrences of each unique number
unique, counts = np.unique(num_steps, return_counts=True)

# Create the bar plot
plt.bar(unique, counts)
plt.xlabel('Number of regions')
plt.ylabel('Frequency')
plt.title('Number of regions analysed per object')
plt.xticks(unique)  # Set x-ticks to the unique values
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# Precision - Recall Curve

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve

def evaluate_precision_recall(model_link, selected_image_list, selected_bounding_box_list, num_imgs=100, target_size=(224, 224)):
    iou_list = []
    ground_truths = []
    reward_scores = []

    vgg16 = create_vgg16()
    deep_q = create_q_model()
    deep_q.load_weights(model_link)

    for i in range(0, num_imgs):
        bounding_box = selected_bounding_box_list[i][0]  # Ground truth bounding box
        image = selected_image_list[i]  # Original image

        predicted_box = None

        # Perform prediction using YOLOv5
        results = model(image)
        predictions = results.xyxy[0]  # Predictions in (x1, y1, x2, y2, conf, class)
        print(len(predictions))
        if len(predictions) > 0:
            pred_box = predictions[0][:4].cpu().numpy()  # Taking the first predicted bounding box
            pred_box = pred_box.astype(int)
        else:
            history = [-1] * history_size
            height, width, channel = np.shape(image)
    
            current_mask = np.asarray([0, 0, width, height])  # Initial full image mask
            feature = extract_feature(image, history, vgg16)
            end = False
            masks = []
            step = 0
            predicted_box = None
            cropped_image = image.copy()
    
            while not end:
                q_value = compute_q(feature, deep_q)
                action = np.argmax(q_value)
    
                history = history[1:]
                history.append(action)
    
                if action == 8 or step == 7:
                    end = True
                    predicted_box = current_mask  # The final predicted bounding box
                    reward_scores.append(q_value[5])  # Assuming the sixth neuron corresponds to terminal action
                    break
                else:
                    new_mask = compute_mask(action, current_mask)
                    cropped_image = crop_image(image, new_mask)
                    feature = extract_feature(cropped_image, history, vgg16)
                    current_mask = new_mask
    
                step += 1

        if predicted_box is not None:
            # Compute IoU between the predicted bounding box and the ground truth
            iou = compute_iou(predicted_box, bounding_box)
            iou_list.append(iou)
            
            # Classify as True Positive (TP) or False Positive (FP) based on IoU threshold of 0.5
            if iou >= 0.5:
                ground_truths.append(1)  # TP
            else:
                ground_truths.append(0)  # FP

    # Compute Precision and Recall using ground_truths and reward_scores
    precision, recall, _ = precision_recall_curve(ground_truths, reward_scores)

    return precision, recall


def plot_precision_recall_curve(precision, recall, model_name):
    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, label=f'{model_name} Model')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve: {model_name}')
    plt.legend()
    plt.grid(True)
    plt.show()


test_image_list, test_bounding_box_list = load_data_fil("valid")

# Ensure test_image_list and test_bounding_box_list have the same length
assert len(test_image_list) == len(test_bounding_box_list), "Image list and bounding box list must have the same length."

# Find indices of images that have exactly one bounding box
single_bbox_indices = [i for i, bbox_list in enumerate(test_bounding_box_list) if len(bbox_list) == 1]

# Randomly select 20 such images (if there are at least 50)
if len(single_bbox_indices) <200:
    print(f"Only {len(single_bbox_indices)} images with one bounding box found.")
    selected_indices = single_bbox_indices  # if less than 50, select all
else:
    selected_indices = random.sample(single_bbox_indices, 200)

# Select the images and their corresponding bounding boxes
selected_image_list = [test_image_list[i] for i in selected_indices]
selected_bounding_box_list = [test_bounding_box_list[i] for i in selected_indices]

print(f"Selected {len(selected_image_list)} images and their corresponding bounding boxes.")

# Example of calling the function with your model and images
precision, recall = evaluate_precision_recall(model_link, selected_image_list, selected_bounding_box_list)
plot_precision_recall_curve(precision, recall, "DQN Agent")

# Searching for Objects

In [73]:
def searching_objects_clear_boxes(model_link, selected_image_list, selected_bounding_box_list, num_imgs=100, target_size=(224, 224)):
    object_number = 1
    iou = []
    vgg16 = create_vgg16()
    deep_q = create_q_model()
    deep_q.load_weights(model_link)

    for i in range(0, num_imgs):
        bounding_box = selected_bounding_box_list[i][0]  # Ground truth bounding box
        image = selected_image_list[i]  # Original image
        history = [-1] * history_size
        height, width, channel = np.shape(image)
        
        current_mask = np.asarray([0, 0, width, height])  # Initial full image mask
        
        feature = extract_feature(image, history, vgg16)
        end = False
        masks = []
        cropped_images = []  # To store the previous cropped images for each step
        step = 0

        # Initial cropped image (whole image at the beginning)
        cropped_image = image.copy()

        while not end:

            q_value = compute_q(feature, deep_q)
            action = np.argmax(q_value)

            history = history[1:]
            history.append(action)

            # Draw bounding boxes on a fresh copy of the cropped image for each step
            step_image = cropped_image.copy()

            if action == 8 or step == 7:
                end = True
                new_mask = current_mask

                # Draw the final predicted bounding box (in blue) on the final cropped image
                cv2.rectangle(step_image, (int(new_mask[0]), int(new_mask[1])),
                              (int(new_mask[2]), int(new_mask[3])), (0, 0, 255), 2)
            
                # Draw the ground truth bounding box (in green)
                #cv2.rectangle(step_image, (int(bounding_box[0]), int(bounding_box[1])),
                #              (int(bounding_box[2]), int(bounding_box[3])), (0, 255, 0), 2)
                    
                cropped_images.append(cv2.resize(step_image.copy(), target_size))  # Resize before storing
                masks.append(new_mask)
                break
            else:
                # Compute new mask based on action
                new_mask = compute_mask(action, current_mask)

                # Draw the current predicted bounding box (in red) on the fresh cropped image for this step
                cv2.rectangle(step_image, (int(new_mask[0]), int(new_mask[1])),
                              (int(new_mask[2]), int(new_mask[3])), (255, 0, 0), 2)

                # Resize the fresh image for plotting, without affecting the next step
                cropped_images.append(step_image.copy())

                # Crop the image for the next step
                cropped_image = crop_image(image, new_mask)

                feature = extract_feature(cropped_image, history, vgg16)
                masks.append(new_mask)
                current_mask = new_mask

            step += 1

        # Plot the cropped images, each with only the bounding box for its respective step
        n_steps = len(cropped_images) - 1
        if n_steps <= 1:
            continue
        # Compute IoU for the final mask
        mask = masks[-1]
        iou_ = compute_iou(mask, bounding_box)
        iou.append(iou_)

        if iou_ >= 0.2:
            fig, axes = plt.subplots(1, n_steps, figsize=(n_steps * 2, 5))  # Adjust figure size
            for j in range(n_steps):
                axes[j].imshow(cv2.cvtColor(cropped_images[j], cv2.COLOR_BGR2RGB))  # Convert BGR to RGB
                axes[j].axis('off')  # Hide axis
                #axes[j].set_title(f'Step {j + 1}')  # Title with step number
    
            plt.suptitle(f"Search path for Image {i} - Red: Predicted box, Green: Ground truth")
            plt.show()


In [None]:
searching_objects_clear_boxes(model_link, selected_image_list, \
     selected_bounding_box_list, num_imgs = 20)