## 8.2 Inferencing Faster R-CNN
- Inferencing faster R-CNN OpenCV DNN
- Inferencing faster R-CNN ONNX Runtime

- Install necesarry package

In [None]:
!pip install onnx
!pip install onnxruntime
!pip install --force-reinstall opencv-python==4.7.0.72

### 8.2.1 Inferencing faster R-CNN OpenCV DNN

In [None]:
import cv2
import numpy as np

def load_model(onnx_model_path):
    # Load the ONNX model
    net = cv2.dnn.readNetFromONNX(onnx_model_path)
    return net

def preprocess_image(image_path):
    # Load the image and resize to 224x224
    image = cv2.imread(image_path)
    image_resized = cv2.resize(image, (224, 224))
    blob = cv2.dnn.blobFromImage(image_resized, 1/255.0, (224, 224), swapRB=True, crop=False)
    return image, image_resized, blob

def run_inference(net, blob):
    # Set the input to the network
    net.setInput(blob)
    
    # Forward pass through the network
    outputs = net.forward()  # Get all the output layers
    return outputs

def post_process(outputs, image_shape, threshold=0.5):
    boxes, labels, scores = [], [], []
    
    # Assuming output format: boxes, labels, scores
    for output in outputs:
        # Iterate through each detected object
        for detection in output:
            confidence = detection[2]
            if confidence > threshold:
                # Extract box coordinates (x_min, y_min, x_max, y_max)
                box = detection[3:7] * np.array([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
                boxes.append(box.astype("int"))
                labels.append(int(detection[1]))
                scores.append(float(confidence))
    
    return boxes, labels, scores

def draw_detections(image, boxes, labels, scores, class_names=None):
    for box, label, score in zip(boxes, labels, scores):
        # Draw the bounding box
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        label_text = f"{class_names[label]}: {score:.2f}" if class_names else f"Label {label}: {score:.2f}"
        
        # Put the label text
        cv2.putText(image, label_text, (box[0], box[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    
    return image

# Main function to perform inference
def main(image_path, onnx_model_path):
    # Load the model
    net = load_model(onnx_model_path)
    
    # Preprocess the image
    original_image, resized_image, blob = preprocess_image(image_path)
    
    # Run inference
    outputs = run_inference(net, blob)
    
    # Post-process the outputs
    boxes, labels, scores = post_process(outputs, original_image.shape)
    
    # Define class names if available
    class_names = ["background", "scissors"]  # Modify according to your dataset

    # Draw the detections on the original image
    result_image = draw_detections(original_image.copy(), boxes, labels, scores, class_names)
    
    # Display the result
    cv2.imshow("Detections", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
image_path = "image2.jpg"
main(image_path, MODEL_NAME)


### 8.2.2 Inferencing Faster R-CNN ONNX Runtime

In [None]:
import onnx
import onnxruntime as ort

# Load the ONNX model
MODEL_NAME = "fasterrcnn_resnet50_fpn_v2_scissors.onnx"
onnx_model = onnx.load(MODEL_NAME)
onnx.checker.check_model(onnx_model)

# Run inference with ONNX Runtime
ort_session = ort.InferenceSession(MODEL_NAME)

# Load your image
image_path = "image2.jpg"
image = Image.open(image_path).convert("RGB")

# Define the same transformations used during training
transform = T.Compose([
    T.Resize((224, 224)),  # Resize to match the model's input size, if needed
    T.ToTensor(),          # Convert the image to a tensor
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet stats
])

# Apply transformations and add batch dimension
input_tensor = transform(image).unsqueeze(0)  # Shape: [1, 3, 224, 224]

# Convert tensor to numpy format
input_image = input_tensor.numpy()

# Example inference
outputs = ort_session.run(None, {"input": input_image})
print("ONNX model outputs:", outputs)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np

# Define a function to display the image with detected boxes and labels
def display_detections(image_path, boxes, labels, scores, threshold=0.5, class_names=None):
    """
    Displays the image with bounding boxes, labels, and confidence scores.

    Parameters:
    - image_path: Path to the input image.
    - boxes: Bounding boxes from the model output.
    - labels: Class labels from the model output.
    - scores: Confidence scores from the model output.
    - threshold: Minimum confidence score to display a detection.
    - class_names: List of class names corresponding to label indices.
    """
    # Load the image
    image = Image.open(image_path).convert("RGB")
    image = image.resize((224, 224))
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Create a plot overlay for bounding boxes
    ax = plt.gca()

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:  # Only display boxes above the confidence threshold
            # Extract box coordinates
            x_min, y_min, x_max, y_max = box

            # Draw the bounding box
            rect = patches.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min,
                linewidth=2, edgecolor="red", facecolor="none"
            )
            ax.add_patch(rect)

            # Add label and score
            label_text = f"{class_names[label]}: {score:.2f}" if class_names else f"Label {label}: {score:.2f}"
            plt.text(
                x_min, y_min - 10, label_text,
                color="red", fontsize=12, backgroundcolor="white"
            )

    # Show the plot
    plt.axis("off")
    plt.show()

# Example usage with outputs from the ONNX model
image_path = "image2.jpg"
boxes = outputs[0]  # Bounding boxes
labels = outputs[1]  # Class labels
scores = outputs[2]  # Confidence scores

# Define class names if available, for example: class_names = ["background", "scissors", "other"]
class_names = ["background", "scissors"]  # Modify based on your dataset

# Display detections
display_detections(image_path, boxes, labels, scores, threshold=0.5, class_names=class_names)
