In [1]:
import os
import xml.etree.ElementTree as ET
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization as BN,
    GlobalMaxPooling2D, Activation, Flatten, Dense
)
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
import tensorflow.keras.backend as K

2024-10-26 22:02:25.620086: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-26 22:02:25.632510: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-26 22:02:25.635976: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-26 22:02:25.645215: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def parse_xml_label(xml_file, class_name="weed"):
    """Parse a single Pascal VOC XML file to extract bounding boxes."""
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get image dimensions
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    boxes = []
    for obj in root.iter('object'):
        name = obj.find('name').text
        if name == class_name:
            # Get bounding box coordinates
            xmlbox = obj.find('bndbox')
            xmin = int(xmlbox.find('xmin').text) / width
            ymin = int(xmlbox.find('ymin').text) / height
            xmax = int(xmlbox.find('xmax').text) / width
            ymax = int(xmlbox.find('ymax').text) / height

            # Store in YOLO format: (x_center, y_center, width, height)
            x_center = (xmin + xmax) / 2
            y_center = (ymin + ymax) / 2
            box_width = xmax - xmin
            box_height = ymax - ymin
            boxes.append([x_center, y_center, box_width, box_height])

    return np.array(boxes)


In [3]:
def load_data(image_folder, xml_folder, input_size=(416, 416)):
    X = []  # Images
    y = []  # Labels (bounding boxes)

    for xml_file in os.listdir(xml_folder):
        if xml_file.endswith('.xml'):
            xml_path = os.path.join(xml_folder, xml_file)

            # Parse the XML for bounding boxes
            boxes = parse_xml_label(xml_path)

            # Load the corresponding image
            img_file = xml_file.replace('.xml', '.jpg')
            img_path = os.path.join(image_folder, img_file)
            img = cv2.imread(img_path)

            if img is None:
                print(f"Warning: Could not load image {img_path}")
                continue

            # Resize and normalize the image
            img = cv2.resize(img, input_size)
            img = img / 255.0

            X.append(img)
            y.append(boxes)

    return np.array(X), np.array(y, dtype=object)

# Load dataset

X_train, y_train = load_data('images', 'processed_images')


In [4]:
def add_class_score_to_labels(labels, class_score=1.0):
    """Add a class score to each bounding box."""
    new_labels = []
    for boxes in labels:
        # Add the class score (e.g., 1.0) to each box
        boxes_with_score = np.hstack([boxes, np.full((boxes.shape[0], 1), class_score)])
        new_labels.append(boxes_with_score)
    return new_labels

# Add class scores to the labels
y_train_with_scores = add_class_score_to_labels(y_train)

# Pad the labels so all images have exactly 15 boxes
num_boxes = 15  # Maximum number of boxes per image
y_train_padded = pad_sequences(y_train_with_scores, maxlen=num_boxes, padding='post', dtype='float32')

print("Padded labels shape:", y_train_padded.shape)  # Should be (num_images, 15, 5)

Padded labels shape: (278, 15, 5)


In [None]:
def build_model(input_shape=(416, 416, 3), num_boxes=15, lr=1e-4):
    input_img = Input(shape=input_shape)

    # Preprocessing: Normalize input
    x = Lambda(lambda x: K.cast_to_floatx(x))(input_img)
    x = Lambda(lambda x: x / 255.0)(x)

    # Convolutional Layers
    for filters in [16, 32, 32, 16]:
        x = BN()(x)
        x = Conv2D(filters, 3, padding='valid', activation='selu', kernel_initializer='lecun_normal')(x)
        x = Conv2D(filters, 3, padding='valid', activation='selu', kernel_initializer='lecun_normal')(x)
        x = MaxPooling2D(2)(x)

    # Flatten the output
    x = Flatten()(x)

    # Fully connected layer to predict bounding boxes (x, y, width, height) and a class score for each box
    output = Dense(num_boxes * 5)(x)  # 5 values per box: (x, y, width, height, class)

    # Reshape the output to match (batch_size, num_boxes, 5)
    output = tf.keras.layers.Reshape((num_boxes, 5))(output)

    # Build and compile the model
    model = Model(inputs=input_img, outputs=output)
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='mse', metrics=['accuracy'])

    return model

# Create the model
model = build_model()

# Train the model
history = model.fit(X_train, y_train_padded, epochs=10000, batch_size=8, validation_split=0.2)

I0000 00:00:1730005394.397840 2347707 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730005394.413879 2347707 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730005394.414101 2347707 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730005394.415633 2347707 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730005394.415900 2347707 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

Epoch 1/10000


I0000 00:00:1730005398.230558 2348080 service.cc:146] XLA service 0x7f78280996c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730005398.230597 2348080 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3080, Compute Capability 8.6
2024-10-26 22:03:18.284011: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-26 22:03:18.543954: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907




[1m 3/28[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 59ms/step - accuracy: 0.2361 - loss: 2.5292

I0000 00:00:1730005406.780386 2348080 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m27/28[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - accuracy: 0.2442 - loss: 1.5377





[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 339ms/step - accuracy: 0.2447 - loss: 1.5078 - val_accuracy: 0.1488 - val_loss: 0.2662
Epoch 2/10000
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.2907 - loss: 0.5276 - val_accuracy: 0.1167 - val_loss: 0.3421
Epoch 3/10000
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.2963 - loss: 0.3172 - val_accuracy: 0.1310 - val_loss: 0.3316
Epoch 4/10000
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.3509 - loss: 0.2307 - val_accuracy: 0.2107 - val_loss: 0.2638
Epoch 5/10000
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.3625 - loss: 0.1781 - val_accuracy: 0.2643 - val_loss: 0.2327
Epoch 6/10000
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.3771 - loss: 0.1432 - val_accuracy: 0.3381 - val_loss: 0.2007
Epoch 7/10000
[1m28/28[0m 

In [6]:
model.save('weed_detector_model.keras')