In [1]:
import cv2
import os
import xml.etree.ElementTree as ET
from sklearn.metrics import average_precision_score
import numpy as np


In [2]:
# Load the pre-trained MobileNet SSD model
config_file = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
frozen_model = 'frozen_inference_graph.pb'
model = cv2.dnn_DetectionModel(frozen_model, config_file)


In [3]:
# Load class labels
classLabels = []
file_name = 'Labels.txt'
with open(file_name, 'rt') as fpt:
    classLabels = fpt.read().rstrip('\n').split('\n')



In [4]:
# Configure the model
model.setInputSize(320, 320)
model.setInputScale(1.0/127.5)
model.setInputMean((127.5, 127.5, 127.5))
model.setInputSwapRB(True)

< cv2.dnn.Model 000001372C52DDD0>

In [5]:

# Create a directory to save XML annotation files
output_dir = 'annotations'
os.makedirs(output_dir, exist_ok=True)

font_scale = 3
font = cv2.FONT_HERSHEY_PLAIN

frame_count = 0

# Initialize video capture from a video file
video_file_path = 'videoplayback.mp4'
cap = cv2.VideoCapture(video_file_path)

if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()


# Define a function to create an XML annotation for a detected object
def create_xml_annotation(xml_file, width, height, detected_objects):
    root = ET.Element("annotation")
    
    folder = ET.SubElement(root, "folder")
    folder.text = "images"
    
    filename = ET.SubElement(root, "filename")
    filename.text = os.path.basename(xml_file).replace(".xml", ".jpg")
    
    size = ET.SubElement(root, "size")
    
    width_elem = ET.SubElement(size, "width")
    width_elem.text = str(width)
    
    height_elem = ET.SubElement(size, "height")
    height_elem.text = str(height)
    
    for obj_info in detected_objects:
        class_label = obj_info['class_label']
        bbox = obj_info['bbox']
        
        object_elem = ET.SubElement(root, "object")
        
        name = ET.SubElement(object_elem, "name")
        name.text = class_label
        
        bndbox = ET.SubElement(object_elem, "bndbox")
        
        xmin = ET.SubElement(bndbox, "xmin")
        xmin.text = str(bbox[0])
        
        ymin = ET.SubElement(bndbox, "ymin")
        ymin.text = str(bbox[1])
        
        xmax = ET.SubElement(bndbox, "xmax")
        xmax.text = str(bbox[2])
        
        ymax = ET.SubElement(bndbox, "ymax")
        ymax.text = str(bbox[3])

    tree = ET.ElementTree(root)
    tree.write(xml_file)

# Define paths to the directories containing XML files
model_predictions_dir = r'D:\ORRT\annotations'
manual_annotations_dir = r'D:\ORRT\manual_annotations'

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame")
        break
    
    # Perform object detection on the frame to obtain ClassIndex, confidence, and bbox
    ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55)
    
    # List to store information about detected objects in the frame
    detected_objects = []
    
    if len(ClassIndex) != 0:
        for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox):
            if ClassInd <= 80:
                cv2.rectangle(frame, boxes, (255, 0, 0), 2)
                cv2.putText(frame, classLabels[ClassInd-1], (boxes[0]+10, boxes[1]+40), font, fontScale=font_scale, color=(0, 255, 0), thickness=1)
                
                # Append information about the detected object to the list
                detected_objects.append({
                    'class_label': classLabels[ClassInd-1],
                    'bbox': boxes.tolist()
                })

    # Save the annotation as an XML file with information about all detected objects
    filename = f"frame_{frame_count}.xml"
    create_xml_annotation(os.path.join(output_dir, filename), frame.shape[1], frame.shape[0], detected_objects)
    
    cv2.imshow('Object Detection Tutorial', frame)
    
    # Save the video frame as an image (optional)
    cv2.imwrite(f"output_images/frame_{frame_count}.jpg", frame)
    
    if cv2.waitKey(2) & 0xFF == ord('q'):
        break
        
    frame_count += 1

cap.release()
cv2.destroyAllWindows()


Error: Could not read frame


In [6]:
# Directory containing model-generated annotation files
model_annotations_dir = 'annotations'

# Directory containing manual annotation files
manual_annotations_dir = 'manual_annotations'


In [7]:
# Function to parse an XML annotation file
def parse_annotation(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()
    
    filename = root.find('filename').text
    class_label = root.find('object').find('name').text
    
    return filename, class_label


In [8]:
# Compare model-generated annotations with manual annotations
model_annotations = os.listdir(model_annotations_dir)
manual_annotations = os.listdir(manual_annotations_dir)

for model_annotation_file in model_annotations:
    if model_annotation_file not in manual_annotations:
        print(f"Missing manual annotation for {model_annotation_file}")
    else:
        model_annotation_path = os.path.join(model_annotations_dir, model_annotation_file)
        manual_annotation_path = os.path.join(manual_annotations_dir, model_annotation_file)
        
        model_filename, model_class_label = parse_annotation(model_annotation_path)
        manual_filename, manual_class_label = parse_annotation(manual_annotation_path)
        
        if model_filename != manual_filename:
            print(f"Filename mismatch: Model: {model_filename}, Manual: {manual_filename}")
        elif model_class_label != manual_class_label:
            print(f"Class label mismatch: Model: {model_class_label}, Manual: {manual_class_label}")
        else:
            print(f"Annotation match for {model_annotation_file}")


Annotation match for frame_0.xml
Annotation match for frame_1.xml
Annotation match for frame_10.xml
Annotation match for frame_100.xml
Annotation match for frame_101.xml
Annotation match for frame_102.xml
Annotation match for frame_103.xml
Annotation match for frame_104.xml
Annotation match for frame_105.xml
Annotation match for frame_106.xml
Annotation match for frame_107.xml
Annotation match for frame_108.xml
Annotation match for frame_109.xml
Annotation match for frame_11.xml
Annotation match for frame_110.xml
Annotation match for frame_111.xml
Annotation match for frame_112.xml
Annotation match for frame_113.xml
Annotation match for frame_114.xml
Annotation match for frame_115.xml
Annotation match for frame_116.xml
Annotation match for frame_117.xml
Annotation match for frame_118.xml
Annotation match for frame_119.xml
Annotation match for frame_12.xml
Annotation match for frame_120.xml
Annotation match for frame_121.xml
Annotation match for frame_122.xml
Annotation match for frame_

In [9]:
# Initialize counters for accuracy calculation
correct_matches = 0
total_annotations = 0

# Define an empty list to store class labels
class_labels = []
# Define an empty list to store true labels and predicted scores
y_true = []
y_scores = []

# Loop through manual annotations to gather ground truth
manual_annotations = os.listdir(manual_annotations_dir)
for manual_annotation_file in manual_annotations:
    manual_annotation_path = os.path.join(manual_annotations_dir, manual_annotation_file)
    _, manual_class_label = parse_annotation(manual_annotation_path)
    class_labels.append(manual_class_label)

# Loop through model-generated annotations to gather predictions and ground truth
model_annotations = os.listdir(model_annotations_dir)
for model_annotation_file in model_annotations:
    model_annotation_path = os.path.join(model_annotations_dir, model_annotation_file)
    model_filename, model_class_label = parse_annotation(model_annotation_path)
    

# Check if manual annotation file exists for this frame
    if model_annotation_file in manual_annotations:
        # Load ground truth and prediction data
        y_true.append(1 if model_class_label == class_labels[0] else 0)  # Set 1 for correct class, else 0
        y_scores.append(confidence)  # Replace 'confidence' with the actual confidence score

    


In [10]:
# Directory containing ground truth annotation files
ground_truth_dir = 'manual_annotations'

# Directory containing model output annotation files
model_output_dir = 'annotations'

# Function to parse an XML annotation file
def parse_annotation(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()

    # Extract bounding boxes, class labels, and other relevant information
    # Modify this function according to your XML structure
    bounding_boxes = [...]  # Extract bounding box coordinates
    class_labels = [...]     # Extract class labels
    confidence_scores = [...]  # Extract confidence scores

    return bounding_boxes, class_labels, confidence_scores
# Initialize lists to store ground truth and model predictions for all classes
all_ground_truth = []  # List of ground truth annotations for all classes
all_model_predictions = []  # List of model predictions for all classes


In [11]:
# Function to calculate IoU between two bounding boxes
def calculate_iou(box1, box2):
    # Check if the boxes are identical
    if box1 == box2:
        return 1.0  # IoU is 1 when boxes are identical
    
    # Calculate intersection coordinates
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # Calculate intersection area
    intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)

    # Calculate area of both bounding boxes
    area_box1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    area_box2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    # Check if either bounding box has zero area
    if area_box1 == 0 or area_box2 == 0:
        return 0.0  # IoU is zero

    # Calculate IoU
    iou = intersection_area / float(area_box1 + area_box2 - intersection_area)

    return iou

# Function to parse the first bounding box from XML annotation
def parse_first_bbox(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    first_bbox = None
    
    # Find the first object
    first_object = root.find('object')
    if first_object is not None:
        bbox = first_object.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        
        first_bbox = (xmin, ymin, xmax, ymax)
    
    return first_bbox

# Directory paths for ground truth and model output XML files
ground_truth_dir = 'manual_annotations'
model_output_dir = 'annotations'

# List to store IoU values
iou_values = []

# Iterate over XML files in both directories
for filename in os.listdir(ground_truth_dir):
    ground_truth_file = os.path.join(ground_truth_dir, filename)
    model_output_file = os.path.join(model_output_dir, filename)

    # Parse the first bounding box information from XML files
    gt_bbox = parse_first_bbox(ground_truth_file)
    model_bbox = parse_first_bbox(model_output_file)

    if gt_bbox is not None and model_bbox is not None:
        # Calculate IoU between Ground Truth and Model Output
        iou = calculate_iou(gt_bbox, model_bbox)
        iou_values.append(iou)

# Calculate the average IoU
average_iou = sum(iou_values) / len(iou_values)
print(f"Average IoU: {average_iou:.2f}")


Average IoU: 0.91
