### Faster RCNN and SSD for multiple objects

In [None]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn, ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.ssd import SSD300_VGG16_Weights
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw
import os
import matplotlib.pyplot as plt
import json
from sklearn.metrics import accuracy_score
import numpy as np

# COCO Dataset labels
COCO_INSTANCE_CATEGORY_NAMES = [
    '', '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Path to test images directory
IMAGE_DIR = "C:\\Users\\chari\\Downloads\\test2017\\test2017"

# Path to COCO annotations (for validation)
ANNOTATION_PATH = "C:\\Users\\chari\\Downloads\\annotations_trainval2017\\annotations\\instances_val2017.json"

# Load COCO annotations
with open(ANNOTATION_PATH, 'r') as f:
    coco_annotations = json.load(f)

# Create a dictionary for ground truth labels and boxes
ground_truth = {}
for annotation in coco_annotations['annotations']:
    image_id = annotation['image_id']
    category_id = annotation['category_id']
    bbox = annotation['bbox']
    if image_id not in ground_truth:
        ground_truth[image_id] = []
    ground_truth[image_id].append((category_id, bbox))

# Check if there are any .jpg files in the directory
image_files = [file_name for file_name in os.listdir(IMAGE_DIR) if file_name.endswith(".jpg")]
if len(image_files) == 0:
    print(f"No .jpg image files found in {IMAGE_DIR}. Please check the folder.")
else:
    print(f"Found {len(image_files)} .jpg image(s).")

# Load models with updated weights parameter
faster_rcnn = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT).eval()
ssd = ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT).eval()

# Helper function to filter predictions based on confidence threshold
def get_filtered_predictions(predictions, scores, threshold=0.5):
    """Filter predictions based on confidence score threshold."""
    return [pred for pred, score in zip(predictions, scores) if score >= threshold]

# Function to calculate Intersection over Union (IoU)
def compute_iou(box1, box2):
    """Compute Intersection over Union (IoU) between two boxes."""
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    # Compute the coordinates of the intersection rectangle
    inter_x1 = max(x1, x2)
    inter_y1 = max(y1, y2)
    inter_x2 = min(x1 + w1, x2 + w2)
    inter_y2 = min(y1 + h1, y2 + h2)

    # Compute area of intersection
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Compute area of both boxes
    box1_area = w1 * h1
    box2_area = w2 * h2

    # Compute union area
    union_area = box1_area + box2_area - inter_area

    # Compute IoU
    return inter_area / union_area if union_area > 0 else 0

# Function to visualize predictions on the image
def visualize_predictions(image, boxes, scores, labels, model_name):
    """Overlay bounding boxes, labels, and scores on the image."""
    draw = ImageDraw.Draw(image)
    for box, score, label in zip(boxes, scores, labels):
        if score > 0.5:  # Only display detections with confidence > 0.5
            if label < len(COCO_INSTANCE_CATEGORY_NAMES):
                label_name = COCO_INSTANCE_CATEGORY_NAMES[label]
            else:
                label_name = "Unknown"
            box = [int(coord) for coord in box]
            draw.rectangle(box, outline="red", width=3)
            draw.text((box[0], box[1]), f"{label_name} {score:.2f}", fill="red")
    return image

# Process and visualize predictions for all images in the directory
faster_rcnn_accuracies = []
ssd_accuracies = []

with torch.no_grad():
    for img_name in image_files:
        img_path = os.path.join(IMAGE_DIR, img_name)
        img = Image.open(img_path).convert("RGB")  # Open image
        
        print(f"Processing image: {img_name}")

        # Convert image to tensor
        img_tensor = F.to_tensor(img)

        # Faster R-CNN predictions
        faster_rcnn_output = faster_rcnn([img_tensor])[0]
        faster_rcnn_boxes = faster_rcnn_output['boxes'].cpu().numpy()
        faster_rcnn_scores = faster_rcnn_output['scores'].cpu().numpy()
        faster_rcnn_labels = faster_rcnn_output['labels'].cpu().numpy()

        # SSD predictions
        ssd_output = ssd([img_tensor])[0]
        ssd_boxes = ssd_output['boxes'].cpu().numpy()
        ssd_scores = ssd_output['scores'].cpu().numpy()
        ssd_labels = ssd_output['labels'].cpu().numpy()

        # Get ground truth for the image
        image_id = int(img_name.split('.')[0])
        gt_data = ground_truth.get(image_id, [])

        # Filter predictions for both models (confidence threshold)
        faster_rcnn_filtered = get_filtered_predictions(faster_rcnn_labels, faster_rcnn_scores, threshold=0.5)
        ssd_filtered = get_filtered_predictions(ssd_labels, ssd_scores, threshold=0.5)

        # Evaluate accuracy based on IoU threshold
        def calculate_accuracy(gt_data, pred_data):
            correct_preds = 0
            for gt_category, gt_bbox in gt_data:
                for pred_category, pred_bbox in pred_data:
                    iou = compute_iou(gt_bbox, pred_bbox)
                    if iou > 0.5:  # If IoU > 0.5, count as a correct prediction
                        correct_preds += 1
            return correct_preds / len(gt_data) if len(gt_data) > 0 else 0

        # Accuracy for Faster R-CNN
        faster_rcnn_accuracy = calculate_accuracy(gt_data, list(zip(faster_rcnn_labels, faster_rcnn_boxes)))
        faster_rcnn_accuracies.append(faster_rcnn_accuracy)

        # Accuracy for SSD
        ssd_accuracy = calculate_accuracy(gt_data, list(zip(ssd_labels, ssd_boxes)))
        ssd_accuracies.append(ssd_accuracy)

        # Visualize Faster R-CNN predictions
        faster_rcnn_img = visualize_predictions(img.copy(), faster_rcnn_boxes, faster_rcnn_scores, faster_rcnn_labels, "Faster R-CNN")
        plt.figure(figsize=(8, 8))
        plt.imshow(faster_rcnn_img)
        plt.title(f"Faster R-CNN Predictions for {img_name}")
        plt.show()

        # Visualize SSD predictions
        ssd_img = visualize_predictions(img.copy(), ssd_boxes, ssd_scores, ssd_labels, "SSD")
        plt.figure(figsize=(8, 8))
        plt.imshow(ssd_img)
        plt.title(f"SSD Predictions for {img_name}")
        plt.show()

# Print average accuracy for both models
print(f"Average accuracy for Faster R-CNN: {np.mean(faster_rcnn_accuracies):.2f}")
print(f"Average accuracy for SSD: {np.mean(ssd_accuracies):.2f}")


In [None]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn, ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.ssd import SSD300_VGG16_Weights
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw
import os
import json
import random
import matplotlib.pyplot as plt
import numpy as np

# COCO Dataset labels
COCO_INSTANCE_CATEGORY_NAMES = [
    '', '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Paths
IMAGE_DIR = "C:\\Users\\chari\\Downloads\\val2017\\val2017"  # Replace with the path to the `val2017` images
ANNOTATION_PATH = "C:\\Users\\chari\\Downloads\\annotations_trainval2017\\annotations\\instances_val2017.json"  # Replace with the path to `instances_val2017.json`

# Load COCO annotations
with open(ANNOTATION_PATH, 'r') as f:
    coco_annotations = json.load(f)

# Create a dictionary for ground truth labels and boxes
ground_truth = {}
for annotation in coco_annotations['annotations']:
    image_id = annotation['image_id']
    category_id = annotation['category_id']
    bbox = annotation['bbox']  # Format: [x, y, width, height]
    if image_id not in ground_truth:
        ground_truth[image_id] = []
    ground_truth[image_id].append((category_id, bbox))

# Get the list of images in val2017
image_files = [file_name for file_name in os.listdir(IMAGE_DIR) if file_name.endswith(".jpg")]

# Select a random subset of 100 images
random.seed(42)  # For reproducibility
selected_images = random.sample(image_files, min(200, len(image_files)))

# Load models
faster_rcnn = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT).eval()
ssd = ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT).eval()

# Helper functions
def get_filtered_predictions(predictions, scores, threshold=0.5):
    """Filter predictions based on confidence score threshold."""
    return [pred for pred, score in zip(predictions, scores) if score >= threshold]

def compute_iou(box1, box2):
    """Compute Intersection over Union (IoU) between two boxes."""
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    # Convert width and height to coordinates
    x1_max, y1_max = x1 + w1, y1 + h1
    x2_max, y2_max = x2 + w2, y2 + h2

    # Compute the coordinates of the intersection rectangle
    inter_x1 = max(x1, x2)
    inter_y1 = max(y1, y2)
    inter_x2 = min(x1_max, x2_max)
    inter_y2 = min(y1_max, y2_max)

    # Compute area of intersection
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Compute area of both boxes
    box1_area = w1 * h1
    box2_area = w2 * h2

    # Compute union area
    union_area = box1_area + box2_area - inter_area

    # Compute IoU
    return inter_area / union_area if union_area > 0 else 0

# Evaluation
faster_rcnn_accuracies = []
ssd_accuracies = []

with torch.no_grad():
    for img_name in selected_images:
        img_path = os.path.join(IMAGE_DIR, img_name)
        img = Image.open(img_path).convert("RGB")  # Open image
        img_tensor = F.to_tensor(img)  # Convert to tensor

        print(f"Processing image: {img_name}")

        # Get ground truth for the image
        image_id = int(img_name.split('.')[0])
        gt_data = ground_truth.get(image_id, [])

        # Faster R-CNN predictions
        faster_rcnn_output = faster_rcnn([img_tensor])[0]
        faster_rcnn_boxes = faster_rcnn_output['boxes'].cpu().numpy()
        faster_rcnn_scores = faster_rcnn_output['scores'].cpu().numpy()
        faster_rcnn_labels = faster_rcnn_output['labels'].cpu().numpy()

        # SSD predictions
        ssd_output = ssd([img_tensor])[0]
        ssd_boxes = ssd_output['boxes'].cpu().numpy()
        ssd_scores = ssd_output['scores'].cpu().numpy()
        ssd_labels = ssd_output['labels'].cpu().numpy()

        # Evaluate accuracy based on IoU threshold
        def calculate_accuracy(gt_data, pred_labels, pred_boxes):
            correct_preds = 0
            for gt_category, gt_bbox in gt_data:
                for pred_label, pred_bbox in zip(pred_labels, pred_boxes):
                    iou = compute_iou(gt_bbox, pred_bbox)
                    if iou > 0.5 and gt_category == pred_label:
                        correct_preds += 1
            return correct_preds / len(gt_data) if len(gt_data) > 0 else 0

        # Accuracy for Faster R-CNN
        faster_rcnn_accuracy = calculate_accuracy(gt_data, faster_rcnn_labels, faster_rcnn_boxes)
        faster_rcnn_accuracies.append(faster_rcnn_accuracy)

        # Accuracy for SSD
        ssd_accuracy = calculate_accuracy(gt_data, ssd_labels, ssd_boxes)
        ssd_accuracies.append(ssd_accuracy)

# Print average accuracy for both models
print(f"Average accuracy for Faster R-CNN: {np.mean(faster_rcnn_accuracies):.2f}")
print(f"Average accuracy for SSD: {np.mean(ssd_accuracies):.2f}")


### Faster RCNN and SSD with single object

In [None]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn, ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.ssd import SSD300_VGG16_Weights
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw
import os
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import numpy as np

# COCO Dataset labels
COCO_INSTANCE_CATEGORY_NAMES = [
    '', '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Directory containing the synthetic images
IMAGE_DIR = "C:\\Users\\chari\\Downloads\\staticMetricsOutputs"

# Load models with updated weights parameter
faster_rcnn = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT).eval()
ssd = ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT).eval()

# Load images and prepare inputs
image_tensors = []
image_names = []

for img_name in os.listdir(IMAGE_DIR):
    if img_name.endswith(".png"):  # Assuming the images are in PNG format
        img_path = os.path.join(IMAGE_DIR, img_name)
        img = Image.open(img_path).convert("RGB")
        img_tensor = F.to_tensor(img)
        image_tensors.append(img_tensor)
        image_names.append(img_name)

# Ground truth data for testing (manual assignment based on image labels)
ground_truths = {
    'giraffe.png': [25],
    'bench.png': [15],           
    'elephant.png': [22],         
}

# Metrics storage for accuracy per image
faster_rcnn_accuracies = []
ssd_accuracies = []

# Helper function to filter predictions based on confidence threshold
def get_filtered_predictions(predictions, scores, threshold=0.5):
    """Filter predictions based on confidence score threshold."""
    valid_preds = []
    for i in range(len(predictions)):
        if scores[i] >= threshold:
            valid_preds.append(predictions[i])
    return valid_preds

# Function to calculate accuracy
def calculate_accuracy(ground_truth_labels, predicted_labels):
    """Calculate accuracy."""
    if len(predicted_labels) == 0:  # No predictions made
        return 0  # Return 0 for no predictions

    if len(ground_truth_labels) != len(predicted_labels):
        # If number of predictions doesn't match the ground truth, return 0
        return 0

    # Compute accuracy
    return accuracy_score(ground_truth_labels, predicted_labels)

# Function to visualize predictions on the image
def visualize_predictions(image, boxes, scores, labels, model_name):
    """Overlay bounding boxes, labels, and scores on the image."""
    draw = ImageDraw.Draw(image)
    for box, score, label in zip(boxes, scores, labels):
        if score > 0.5:  # Only display detections with confidence > 0.5
            if label < len(COCO_INSTANCE_CATEGORY_NAMES):
                label_name = COCO_INSTANCE_CATEGORY_NAMES[label]
            else:
                label_name = "Unknown"
            box = [int(coord) for coord in box]
            draw.rectangle(box, outline="red", width=3)
            draw.text((box[0], box[1]), f"{label_name} {score:.2f}", fill="red")
    return image

# Run inference and visualize
with torch.no_grad():
    for i, img_tensor in enumerate(image_tensors):
        img_name = image_names[i]
        img_path = os.path.join(IMAGE_DIR, img_name)
        img = Image.open(img_path).convert("RGB")  # Reload image for visualization

        print(f"Processing image: {img_name}")

        # Faster R-CNN
        faster_rcnn_output = faster_rcnn([img_tensor])[0]
        faster_rcnn_boxes = faster_rcnn_output['boxes'].cpu().numpy()
        faster_rcnn_scores = faster_rcnn_output['scores'].cpu().numpy()
        faster_rcnn_labels = faster_rcnn_output['labels'].cpu().numpy()

        # SSD
        ssd_output = ssd([img_tensor])[0]
        ssd_boxes = ssd_output['boxes'].cpu().numpy()
        ssd_scores = ssd_output['scores'].cpu().numpy()
        ssd_labels = ssd_output['labels'].cpu().numpy()

        # Retrieve ground truth
        ground_truth_labels = ground_truths.get(img_name, [])

        # Filter predictions for valid detections (confidence threshold)
        faster_rcnn_filtered = get_filtered_predictions(faster_rcnn_labels, faster_rcnn_scores, threshold=0.5)
        ssd_filtered = get_filtered_predictions(ssd_labels, ssd_scores, threshold=0.5)

        # Calculate accuracy for Faster R-CNN
        faster_rcnn_accuracy = calculate_accuracy(ground_truth_labels, faster_rcnn_filtered)
        faster_rcnn_accuracies.append(faster_rcnn_accuracy)

        # Calculate accuracy for SSD
        ssd_accuracy = calculate_accuracy(ground_truth_labels, ssd_filtered)
        ssd_accuracies.append(ssd_accuracy)

        # Visualize Faster R-CNN predictions
        faster_rcnn_img = visualize_predictions(img.copy(), faster_rcnn_boxes, faster_rcnn_scores, faster_rcnn_labels, "Faster R-CNN")
        plt.figure(figsize=(8, 8))
        plt.imshow(faster_rcnn_img)
        plt.title(f"Faster R-CNN Predictions for {img_name}")
        plt.show()

        # Visualize SSD predictions
        ssd_img = visualize_predictions(img.copy(), ssd_boxes, ssd_scores, ssd_labels, "SSD")
        plt.figure(figsize=(8, 8))
        plt.imshow(ssd_img)
        plt.title(f"SSD Predictions for {img_name}")
        plt.show()

# Calculate mean accuracy for Faster R-CNN and SSD
mean_faster_rcnn_accuracy = np.mean(faster_rcnn_accuracies)
mean_ssd_accuracy = np.mean(ssd_accuracies)

# Print the overall mean accuracy for both models
print(f"Overall Mean Accuracy for Faster R-CNN: {mean_faster_rcnn_accuracy:.4f}")
print(f"Overall Mean Accuracy for SSD: {mean_ssd_accuracy:.4f}")

# Plot accuracy comparison for both models
fig, ax = plt.subplots(figsize=(10, 6))

# Bar chart for mean accuracy comparison
models = ['Faster R-CNN', 'SSD']
accuracies = [mean_faster_rcnn_accuracy, mean_ssd_accuracy]

ax.bar(models, accuracies, color=['red', 'blue'])

ax.set_ylabel('Mean Accuracy')
ax.set_title('Comparison of Mean Accuracy between Faster R-CNN and SSD')

plt.tight_layout()
plt.show()
