In [3]:
pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp310-cp310-win_amd64.whl (11.0 MB)
     ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
     --------------------------------------- 0.0/11.0 MB 991.0 kB/s eta 0:00:12
     --------------------------------------- 0.0/11.0 MB 991.0 kB/s eta 0:00:12
     --------------------------------------- 0.1/11.0 MB 491.5 kB/s eta 0:00:23
     --------------------------------------- 0.1/11.0 MB 581.0 kB/s eta 0:00:19
     --------------------------------------- 0.1/11.0 MB 420.8 kB/s eta 0:00:26
      -------------------------------------- 0.1/11.0 MB 500.5 kB/s eta 0:00:22
      -------------------------------------- 0.2/11.0 MB 523.5 kB/s eta 0:00:21
      -------------------------------------- 0.2/11.0 MB 523.5 kB/s eta 0:00:21
      -------------------------------------- 0.2/11.0 MB 479.2 kB/s eta 0:00:23
      -------------------------------------- 0.2/11.0 MB 480.3 kB/s eta 0:00:23
      -----------------------------


[notice] A new release of pip is available: 23.0.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


### 1. Import Necessary Libraries


In [4]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense
from tensorflow.keras.models import Model
import cv2
import numpy as np
from sklearn.model_selection import train_test_split


### 2. Read and Convert YOLOv8 Annotations to SSD Format


In [5]:
def read_yolo_annotation(file_path, img_width, img_height):
    annotations = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1]) * img_width
            center_y = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            xmin = int(center_x - width / 2)
            ymin = int(center_y - height / 2)
            xmax = int(center_x + width / 2)
            ymax = int(center_y + height / 2)
            annotations.append([class_id, xmin, ymin, xmax, ymax])
    return annotations

def create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "filename").text = img_filename

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(img_width)
    ET.SubElement(size, "height").text = str(img_height)
    ET.SubElement(size, "depth").text = "3"

    for ann in annotations:
        class_id, xmin, ymin, xmax, ymax = ann
        obj = ET.SubElement(annotation, "object")
        ET.SubElement(obj, "name").text = str(class_id)
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"

        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(xmin)
        ET.SubElement(bndbox, "ymin").text = str(ymin)
        ET.SubElement(bndbox, "xmax").text = str(xmax)
        ET.SubElement(bndbox, "ymax").text = str(ymax)

    tree = ET.ElementTree(annotation)
    output_path = os.path.join(output_dir, img_filename.replace('.jpg', '.xml'))
    tree.write(output_path)

def convert_dataset(yolo_dir, img_dir, output_dir, img_width, img_height):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for yolo_file in os.listdir(yolo_dir):
        if yolo_file.endswith('.txt'):
            img_filename = yolo_file.replace('.txt', '.jpg')
            yolo_path = os.path.join(yolo_dir, yolo_file)
            annotations = read_yolo_annotation(yolo_path, img_width, img_height)
            create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir)

# Example usage for converting train and val sets
convert_dataset('Annotated_clean/Annotated/labels/train', 'Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', 1280, 720)
convert_dataset('Annotated_clean/Annotated/labels/val', 'images/val', 'Annotated_clean/Annotated/annotations/val', 1280, 720)



### 3. Load and Prepare Dataset


In [13]:
MAX_OBJECTS_PER_IMAGE = 20  # Adjust based on your dataset
NUM_CLASSES = 20  # Adjust based on your dataset


In [6]:
def load_data(img_dir, ann_dir, img_width, img_height):
    images = []
    annotations = []
    for img_file in os.listdir(img_dir):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            image = cv2.resize(image, (img_width, img_height))
            images.append(image)
            
            ann_file = img_file.replace('.jpg', '.xml')
            ann_path = os.path.join(ann_dir, ann_file)
            tree = ET.parse(ann_path)
            root = tree.getroot()
            annots = []
            for obj in root.findall('object'):
                class_id = int(obj.find('name').text)
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                annots.append([class_id, xmin, ymin, xmax, ymax])
            annotations.append(annots)
    return np.array(images), annotations

img_width, img_height = 300, 300  # Example input shape
train_images, train_annotations = load_data('Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
val_images, val_annotations = load_data('Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)


### 4. Define SSD Model


In [14]:
def ssd_model(input_shape, num_classes, max_objects):
    input_tensor = Input(shape=input_shape)
    
    # Example feature extractor (simplified)
    x = Conv2D(32, (3, 3), activation='relu')(input_tensor)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    class_predictions = Dense(max_objects * num_classes, activation='softmax', name='class_predictions')(x)
    bbox_predictions = Dense(max_objects * 4, activation='sigmoid', name='bbox_predictions')(x)

    model = Model(inputs=input_tensor, outputs=[class_predictions, bbox_predictions])
    return model

input_shape = (img_width, img_height, 3)  # Example input shape

model = ssd_model(input_shape, NUM_CLASSES, MAX_OBJECTS_PER_IMAGE)
model.compile(optimizer='adam', 
              loss={'class_predictions': 'sparse_categorical_crossentropy', 'bbox_predictions': 'mse'},
              metrics={'class_predictions': 'accuracy', 'bbox_predictions': 'mse'})


### 5. Train the SSD Model


In [10]:
# MAX_OBJECTS_PER_IMAGE = 20  # Adjust based on your dataset
# NUM_CLASSES = 20  # Adjust based on your dataset


In [15]:
model.fit(train_images, {'class_predictions': train_class_targets, 'bbox_predictions': train_bbox_targets},
          validation_data=(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets}),
          epochs=50)


Epoch 1/50


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 20), output.shape=(None, 400)

In [11]:
def preprocess_annotations(annotations, img_width, img_height, max_objects, num_classes):
    class_targets = np.zeros((len(annotations), max_objects), dtype=np.int32)
    bbox_targets = np.zeros((len(annotations), max_objects, 4), dtype=np.float32)
    
    for i, ann in enumerate(annotations):
        classes = np.zeros((max_objects,), dtype=np.int32)
        bboxes = np.zeros((max_objects, 4), dtype=np.float32)
        
        for j, obj in enumerate(ann[:max_objects]):
            class_id, xmin, ymin, xmax, ymax = obj
            classes[j] = class_id
            bboxes[j] = [xmin / img_width, ymin / img_height, xmax / img_width, ymax / img_height]
        
        class_targets[i] = classes
        bbox_targets[i] = bboxes
    
    return class_targets, bbox_targets

train_class_targets, train_bbox_targets = preprocess_annotations(train_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)
val_class_targets, val_bbox_targets = preprocess_annotations(val_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)


### 6. Validate and Output Performance Data


In [12]:
performance = model.evaluate(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets})
print(f"Validation Loss: {performance[0]}")
print(f"Validation Class Accuracy: {performance[1]}")
print(f"Validation BBox MSE: {performance[2]}")


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 20), output.shape=(None, 20)

In [17]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense
from tensorflow.keras.models import Model
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Constants
MAX_OBJECTS_PER_IMAGE = 20  # Adjust based on your dataset
NUM_CLASSES = 20  # Adjust based on your dataset
img_width, img_height = 300, 300  # Example input shape

# Read and convert YOLOv8 annotations to SSD format
def read_yolo_annotation(file_path, img_width, img_height):
    annotations = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1]) * img_width
            center_y = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            xmin = int(center_x - width / 2)
            ymin = int(center_y - height / 2)
            xmax = int(center_x + width / 2)
            ymax = int(center_y + height / 2)
            annotations.append([class_id, xmin, ymin, xmax, ymax])
    return annotations

def create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "filename").text = img_filename

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(img_width)
    ET.SubElement(size, "height").text = str(img_height)
    ET.SubElement(size, "depth").text = "3"

    for ann in annotations:
        class_id, xmin, ymin, xmax, ymax = ann
        obj = ET.SubElement(annotation, "object")
        ET.SubElement(obj, "name").text = str(class_id)
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"

        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(xmin)
        ET.SubElement(bndbox, "ymin").text = str(ymin)
        ET.SubElement(bndbox, "xmax").text = str(xmax)
        ET.SubElement(bndbox, "ymax").text = str(ymax)

    tree = ET.ElementTree(annotation)
    output_path = os.path.join(output_dir, img_filename.replace('.jpg', '.xml'))
    tree.write(output_path)

def convert_dataset(yolo_dir, img_dir, output_dir, img_width, img_height):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for yolo_file in os.listdir(yolo_dir):
        if yolo_file.endswith('.txt'):
            img_filename = yolo_file.replace('.txt', '.jpg')
            yolo_path = os.path.join(yolo_dir, yolo_file)
            annotations = read_yolo_annotation(yolo_path, img_width, img_height)
            create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir)

# Convert train and val sets
convert_dataset('Annotated_clean/Annotated/labels/train', 'Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
convert_dataset('Annotated_clean/Annotated/labels/val', 'Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Load and preprocess dataset
def load_data(img_dir, ann_dir, img_width, img_height):
    images = []
    annotations = []
    for img_file in os.listdir(img_dir):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            image = cv2.resize(image, (img_width, img_height))
            images.append(image)
            
            ann_file = img_file.replace('.jpg', '.xml')
            ann_path = os.path.join(ann_dir, ann_file)
            tree = ET.parse(ann_path)
            root = tree.getroot()
            annots = []
            for obj in root.findall('object'):
                class_id = int(obj.find('name').text)
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                annots.append([class_id, xmin, ymin, xmax, ymax])
            annotations.append(annots)
    return np.array(images), annotations

train_images, train_annotations = load_data('Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
val_images, val_annotations = load_data('Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Preprocess annotations
def preprocess_annotations(annotations, img_width, img_height, max_objects, num_classes):
    class_targets = np.zeros((len(annotations), max_objects), dtype=np.int32)
    bbox_targets = np.zeros((len(annotations), max_objects, 4), dtype=np.float32)
    
    for i, ann in enumerate(annotations):
        classes = np.zeros((max_objects,), dtype=np.int32)
        bboxes = np.zeros((max_objects, 4), dtype=np.float32)
        
        for j, obj in enumerate(ann[:max_objects]):
            class_id, xmin, ymin, xmax, ymax = obj
            classes[j] = class_id
            bboxes[j] = [xmin / img_width, ymin / img_height, xmax / img_width, ymax / img_height]
        
        class_targets[i] = classes
        bbox_targets[i] = bboxes
    
    return class_targets, bbox_targets

train_class_targets, train_bbox_targets = preprocess_annotations(train_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)
val_class_targets, val_bbox_targets = preprocess_annotations(val_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)

# Define SSD model
def ssd_model(input_shape, num_classes, max_objects):
    input_tensor = Input(shape=input_shape)
    
    # Example feature extractor (simplified)
    x = Conv2D(32, (3, 3), activation='relu')(input_tensor)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    class_predictions = Dense(max_objects * num_classes, activation='softmax', name='class_predictions')(x)
    bbox_predictions = Dense(max_objects * 4, activation='sigmoid', name='bbox_predictions')(x)

    model = Model(inputs=input_tensor, outputs=[class_predictions, bbox_predictions])
    return model

input_shape = (img_width, img_height, 3)  # Example input shape

model = ssd_model(input_shape, NUM_CLASSES, MAX_OBJECTS_PER_IMAGE)
model.compile(optimizer='adam', 
              loss={'class_predictions': 'sparse_categorical_crossentropy', 'bbox_predictions': 'mse'},
              metrics={'class_predictions': 'accuracy', 'bbox_predictions': 'mse'})

# Train the model
model.fit(train_images, {'class_predictions': train_class_targets, 'bbox_predictions': train_bbox_targets},
          validation_data=(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets}),
          epochs=50)

# Validate and output performance data
performance = model.evaluate(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets})
print(f"Validation Loss: {performance[0]}")
print(f"Validation Class Accuracy: {performance[1]}")
print(f"Validation BBox MSE: {performance[2]}")


Epoch 1/50


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 20), output.shape=(None, 400)

In [20]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense, Reshape
from tensorflow.keras.models import Model
import cv2
import numpy as np

# Constants
MAX_OBJECTS_PER_IMAGE = 20  # Adjust based on your dataset
NUM_CLASSES = 20  # Adjust based on your dataset
img_width, img_height = 300, 300  # Example input shape

# Read and convert YOLOv8 annotations to SSD format
def read_yolo_annotation(file_path, img_width, img_height):
    annotations = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1]) * img_width
            center_y = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            xmin = int(center_x - width / 2)
            ymin = int(center_y - height / 2)
            xmax = int(center_x + width / 2)
            ymax = int(center_y + height / 2)
            annotations.append([class_id, xmin, ymin, xmax, ymax])
    return annotations

def create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "filename").text = img_filename

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(img_width)
    ET.SubElement(size, "height").text = str(img_height)
    ET.SubElement(size, "depth").text = "3"

    for ann in annotations:
        class_id, xmin, ymin, xmax, ymax = ann
        obj = ET.SubElement(annotation, "object")
        ET.SubElement(obj, "name").text = str(class_id)
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"

        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(xmin)
        ET.SubElement(bndbox, "ymin").text = str(ymin)
        ET.SubElement(bndbox, "xmax").text = str(xmax)
        ET.SubElement(bndbox, "ymax").text = str(ymax)

    tree = ET.ElementTree(annotation)
    output_path = os.path.join(output_dir, img_filename.replace('.jpg', '.xml'))
    tree.write(output_path)

def convert_dataset(yolo_dir, img_dir, output_dir, img_width, img_height):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for yolo_file in os.listdir(yolo_dir):
        if yolo_file.endswith('.txt'):
            img_filename = yolo_file.replace('.txt', '.jpg')
            yolo_path = os.path.join(yolo_dir, yolo_file)
            annotations = read_yolo_annotation(yolo_path, img_width, img_height)
            create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir)

# Convert train and val sets
convert_dataset('Annotated_clean/Annotated/labels/train', 'Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
convert_dataset('Annotated_clean/Annotated/labels/val', 'Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Load and preprocess dataset
def load_data(img_dir, ann_dir, img_width, img_height):
    images = []
    annotations = []
    for img_file in os.listdir(img_dir):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            image = cv2.resize(image, (img_width, img_height))
            images.append(image)
            
            ann_file = img_file.replace('.jpg', '.xml')
            ann_path = os.path.join(ann_dir, ann_file)
            tree = ET.parse(ann_path)
            root = tree.getroot()
            annots = []
            for obj in root.findall('object'):
                class_id = int(obj.find('name').text)
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                annots.append([class_id, xmin, ymin, xmax, ymax])
            annotations.append(annots)
    return np.array(images), annotations

train_images, train_annotations = load_data('Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
val_images, val_annotations = load_data('Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Preprocess annotations
def preprocess_annotations(annotations, img_width, img_height, max_objects, num_classes):
    class_targets = np.zeros((len(annotations), max_objects), dtype=np.int32)
    bbox_targets = np.zeros((len(annotations), max_objects, 4), dtype=np.float32)
    
    for i, ann in enumerate(annotations):
        classes = np.zeros((max_objects,), dtype=np.int32)
        bboxes = np.zeros((max_objects, 4), dtype=np.float32)
        
        for j, obj in enumerate(ann[:max_objects]):
            class_id, xmin, ymin, xmax, ymax = obj
            classes[j] = class_id
            bboxes[j] = [xmin / img_width, ymin / img_height, xmax / img_width, ymax / img_height]
        
        class_targets[i] = classes
        bbox_targets[i] = bboxes
    
    return class_targets, bbox_targets

train_class_targets, train_bbox_targets = preprocess_annotations(train_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)
val_class_targets, val_bbox_targets = preprocess_annotations(val_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)

# Define SSD model
def ssd_model(input_shape, num_classes, max_objects):
    input_tensor = Input(shape=input_shape)
    
    # Example feature extractor (simplified)
    x = Conv2D(32, (3, 3), activation='relu')(input_tensor)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    
    class_predictions = Dense(max_objects * num_classes, activation='softmax')(x)
    class_predictions = Reshape((max_objects, num_classes), name='class_predictions')(class_predictions)
    
    bbox_predictions = Dense(max_objects * 4, activation='sigmoid')(x)
    bbox_predictions = Reshape((max_objects, 4), name='bbox_predictions')(bbox_predictions)

    model = Model(inputs=input_tensor, outputs=[class_predictions, bbox_predictions])
    return model

input_shape = (img_width, img_height, 3)  # Example input shape

model = ssd_model(input_shape, NUM_CLASSES, MAX_OBJECTS_PER_IMAGE)
model.compile(optimizer='adam', 
              loss={'class_predictions': 'sparse_categorical_crossentropy', 'bbox_predictions': 'mse'},
              metrics={'class_predictions': 'accuracy', 'bbox_predictions': 'mse'})

# Train the model
model.fit(train_images, {'class_predictions': train_class_targets, 'bbox_predictions': train_bbox_targets},
          validation_adata=(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets}),
          epochs=2)

# Validate and output performance data
performance = model.evaluate(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets})
print(f"Validation Loss: {performance[0]}")
print(f"Validation Class Accuracy: {performance[2]}")
print(f"Validation BBox MSE: {performance[1])


Epoch 1/2
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 4s/step - bbox_predictions_mse: 0.3136 - class_predictions_accuracy: 0.8773 - loss: 4.0662 - val_bbox_predictions_mse: 0.2339 - val_class_predictions_accuracy: 0.9500 - val_loss: 3.8858
Epoch 2/2
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 4s/step - bbox_predictions_mse: 0.2271 - class_predictions_accuracy: 0.9500 - loss: 3.8790 - val_bbox_predictions_mse: 0.2337 - val_class_predictions_accuracy: 0.9500 - val_loss: 3.8856
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 464ms/step - bbox_predictions_mse: 0.2344 - class_predictions_accuracy: 0.9500 - loss: 3.8862
Validation Loss: 3.885592222213745
Validation Class Accuracy: 0.23374228179454803
Validation BBox MSE: 0.9499998688697815


In [21]:
performance

[3.885592222213745, 0.23374228179454803, 0.9499998688697815]

In [24]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense, Reshape
from tensorflow.keras.models import Model
import cv2
import numpy as np

# Constants
MAX_OBJECTS_PER_IMAGE = 20  # Adjust based on your dataset
NUM_CLASSES = 20  # Adjust based on your dataset
img_width, img_height = 300, 300  # Example input shape

# Read and convert YOLOv8 annotations to SSD format
def read_yolo_annotation(file_path, img_width, img_height):
    annotations = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1]) * img_width
            center_y = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            xmin = int(center_x - width / 2)
            ymin = int(center_y - height / 2)
            xmax = int(center_x + width / 2)
            ymax = int(center_y + height / 2)
            annotations.append([class_id, xmin, ymin, xmax, ymax])
    return annotations

def create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "filename").text = img_filename

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(img_width)
    ET.SubElement(size, "height").text = str(img_height)
    ET.SubElement(size, "depth").text = "3"

    for ann in annotations:
        class_id, xmin, ymin, xmax, ymax = ann
        obj = ET.SubElement(annotation, "object")
        ET.SubElement(obj, "name").text = str(class_id)
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"

        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(xmin)
        ET.SubElement(bndbox, "ymin").text = str(ymin)
        ET.SubElement(bndbox, "xmax").text = str(xmax)
        ET.SubElement(bndbox, "ymax").text = str(ymax)

    tree = ET.ElementTree(annotation)
    output_path = os.path.join(output_dir, img_filename.replace('.jpg', '.xml'))
    tree.write(output_path)

def convert_dataset(yolo_dir, img_dir, output_dir, img_width, img_height):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for yolo_file in os.listdir(yolo_dir):
        if yolo_file.endswith('.txt'):
            img_filename = yolo_file.replace('.txt', '.jpg')
            yolo_path = os.path.join(yolo_dir, yolo_file)
            annotations = read_yolo_annotation(yolo_path, img_width, img_height)
            create_xml_annotation(img_filename, img_width, img_height, annotations, output_dir)

# Convert train and val sets
convert_dataset('Annotated_clean/Annotated/labels/train', 'Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
convert_dataset('Annotated_clean/Annotated/labels/val', 'Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Load and preprocess dataset
def load_data(img_dir, ann_dir, img_width, img_height):
    images = []
    annotations = []
    for img_file in os.listdir(img_dir):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            image = cv2.resize(image, (img_width, img_height))
            images.append(image)
            
            ann_file = img_file.replace('.jpg', '.xml')
            ann_path = os.path.join(ann_dir, ann_file)
            tree = ET.parse(ann_path)
            root = tree.getroot()
            annots = []
            for obj in root.findall('object'):
                class_id = int(obj.find('name').text)
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                annots.append([class_id, xmin, ymin, xmax, ymax])
            annotations.append(annots)
    return np.array(images), annotations

train_images, train_annotations = load_data('Annotated_clean/Annotated/images/train', 'Annotated_clean/Annotated/annotations/train', img_width, img_height)
val_images, val_annotations = load_data('Annotated_clean/Annotated/images/val', 'Annotated_clean/Annotated/annotations/val', img_width, img_height)

# Preprocess annotations
def preprocess_annotations(annotations, img_width, img_height, max_objects, num_classes):
    class_targets = np.zeros((len(annotations), max_objects), dtype=np.int32)
    bbox_targets = np.zeros((len(annotations), max_objects, 4), dtype=np.float32)
    
    for i, ann in enumerate(annotations):
        classes = np.zeros((max_objects,), dtype=np.int32)
        bboxes = np.zeros((max_objects, 4), dtype=np.float32)
        
        for j, obj in enumerate(ann[:max_objects]):
            class_id, xmin, ymin, xmax, ymax = obj
            classes[j] = class_id
            bboxes[j] = [xmin / img_width, ymin / img_height, xmax / img_width, ymax / img_height]
        
        class_targets[i] = classes
        bbox_targets[i] = bboxes
    
    return class_targets, bbox_targets

train_class_targets, train_bbox_targets = preprocess_annotations(train_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)
val_class_targets, val_bbox_targets = preprocess_annotations(val_annotations, img_width, img_height, MAX_OBJECTS_PER_IMAGE, NUM_CLASSES)

# Define SSD model
def ssd_model(input_shape, num_classes, max_objects):
    input_tensor = Input(shape=input_shape)
    
    # Example feature extractor (simplified)
    x = Conv2D(32, (3, 3), activation='relu')(input_tensor)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    
    class_predictions = Dense(max_objects * num_classes, activation='softmax')(x)
    class_predictions = Reshape((max_objects, num_classes), name='class_predictions')(class_predictions)
    
    bbox_predictions = Dense(max_objects * 4, activation='sigmoid')(x)
    bbox_predictions = Reshape((max_objects, 4), name='bbox_predictions')(bbox_predictions)

    model = Model(inputs=input_tensor, outputs=[class_predictions, bbox_predictions])
    return model

input_shape = (img_width, img_height, 3)  # Example input shape

model = ssd_model(input_shape, NUM_CLASSES, MAX_OBJECTS_PER_IMAGE)
model.compile(optimizer='adam', 
              loss={'class_predictions': 'sparse_categorical_crossentropy', 'bbox_predictions': 'mse'},
              metrics={'class_predictions': 'accuracy', 'bbox_predictions': 'mse'})

# Train the model
model.fit(train_images, {'class_predictions': train_class_targets, 'bbox_predictions': train_bbox_targets},
          validation_data=(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets}),
          epochs=5)


Epoch 1/5
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 8s/step - bbox_predictions_mse: 0.3673 - class_predictions_accuracy: 0.9255 - loss: 3.3263 - val_bbox_predictions_mse: 0.2950 - val_class_predictions_accuracy: 1.0000 - val_loss: 3.1409
Epoch 2/5
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 6s/step - bbox_predictions_mse: 0.3019 - class_predictions_accuracy: 1.0000 - loss: 3.1478 - val_bbox_predictions_mse: 0.2949 - val_class_predictions_accuracy: 1.0000 - val_loss: 3.1409
Epoch 3/5
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 6s/step - bbox_predictions_mse: 0.3012 - class_predictions_accuracy: 1.0000 - loss: 3.1471 - val_bbox_predictions_mse: 0.2949 - val_class_predictions_accuracy: 1.0000 - val_loss: 3.1409
Epoch 4/5
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m282s[0m 5s/step - bbox_predictions_mse: 0.3017 - class_predictions_accuracy: 1.0000 - loss: 3.1477 - val_bbox_predictions_mse: 0.2949 - val_class_

<keras.src.callbacks.history.History at 0x26a15ccb220>

In [25]:
# Validate and output performance data
performance = model.evaluate(val_images, {'class_predictions': val_class_targets, 'bbox_predictions': val_bbox_targets})
print(f"Validation Loss: {performance[0]}")
print(f"Validation Class Accuracy: {performance[2]}")
print(f"Validation BBox MSE: {performance[1]}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 494ms/step - bbox_predictions_mse: 0.2913 - class_predictions_accuracy: 1.0000 - loss: 3.1372
Validation Loss: 3.1408536434173584
Validation Class Accuracy: 1.0
Validation BBox MSE: 0.2949081361293793
