In [5]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras import layers, models
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import cv2

In [6]:
# Paths to data
train_tfrecord = "D:/Onedrive/experiments/experiments/Objectron/bounding_box_regression/apple_dataset/train/Apple.tfrecord"
valid_tfrecord = "D:/Onedrive/experiments/experiments/Objectron/bounding_box_regression/apple_dataset/valid/Apple.tfrecord"
test_tfrecord = "D:/Onedrive/experiments/experiments/Objectron/bounding_box_regression/apple_dataset/test/Apple.tfrecord"
label_map_path = 'D:/Onedrive/experiments/experiments/Objectron/bounding_box_regression/apple_dataset/test/Apple_label_map.pbtxt'

In [7]:
# Define function to parse TFRecords
def parse_tfrecord_fn(tfrecord):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/label': tf.io.VarLenFeature(tf.int64),
    }
    example = tf.io.parse_single_example(tfrecord, feature_description)

    image = tf.io.decode_jpeg(example['image/encoded'], channels=3)
    bboxes = tf.stack([
        tf.sparse.to_dense(example['image/object/bbox/xmin']),
        tf.sparse.to_dense(example['image/object/bbox/ymin']),
        tf.sparse.to_dense(example['image/object/bbox/xmax']),
        tf.sparse.to_dense(example['image/object/bbox/ymax'])
    ], axis=-1)
    labels = tf.sparse.to_dense(example['image/object/class/label'])

    return image, bboxes, labels

# Load TFRecord datasets
def load_dataset(tfrecord_path):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
    return raw_dataset.map(parse_tfrecord_fn)

In [8]:
# Build CNN model for feature extraction
def build_cnn():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
    ])
    return model

# Helper function to apply non-maximum suppression
def non_max_suppression(bboxes, scores, threshold=0.3):
    selected_indices = tf.image.non_max_suppression(
        bboxes, scores, max_output_size=10, iou_threshold=threshold)
    return tf.gather(bboxes, selected_indices)

In [9]:
cnn = build_cnn()

In [None]:
# Corrected extract_features function
def extract_features(dataset):
    features = []
    labels = []
    bboxes = []
    for image, boxes, lbls in dataset:
        # Process each bounding box and label associated with the image
        for i in range(len(lbls)):
            if i < len(boxes):  # Ensure we have a corresponding bounding box for each label
                # Resize and preprocess each region proposal
                img_resized = tf.image.resize(image, (224, 224))
                img_resized = tf.expand_dims(img_resized, axis=0)
                # Extract features from CNN
                feature_vector = cnn.predict(img_resized)
                features.append(feature_vector[0])
                labels.append(lbls[i].numpy())
                bboxes.append(boxes[i].numpy())
    return np.array(features), np.array(labels), np.array(bboxes)

In [26]:
train_dataset = load_dataset(train_tfrecord)
valid_dataset = load_dataset(valid_tfrecord)
test_dataset = load_dataset(test_tfrecord)

# Extract features for training
train_features, train_labels, train_bboxes = extract_features(train_dataset.batch(1))
valid_features, valid_labels, valid_bboxes = extract_features(valid_dataset.batch(1))



  return np.array(features), np.array(labels), np.array(bboxes)




In [32]:
# Flatten train_labels before calling np.unique
train_labels_flattened = np.concatenate(train_labels)

# Debugging: Print shapes of train_features and train_labels_flattened to ensure they match
print("Shape of train_features:", train_features.shape)
print("Length of train_labels_flattened:", len(train_labels_flattened))

# Check for unique labels
unique_labels = np.unique(train_labels_flattened)

# Initialize SVM classifiers for each class
svms = {}
for label in unique_labels:
    svm = SVC(kernel='linear', probability=True)
    # Generate binary labels for each class
    binary_labels = (train_labels_flattened == label).astype(int)
    
    # Debugging: Print the shape of binary_labels to ensure it matches train_features
    print(f"Training SVM for label {label}:")
    print("Binary labels shape:", binary_labels.shape)
    
    # Fit the SVM with consistent-sized inputs
    svm.fit(train_features, binary_labels)
    svms[label] = svm

# Train Bounding Box Regressor for each class
bbox_regressors = {}
for label in unique_labels:
    regressor = LinearRegression()
    # Get indices of current label to filter features and bounding boxes
    label_indices = (train_labels_flattened == label)
    
    # Debugging: Print shapes to ensure consistency
    print(f"Training Bounding Box Regressor for label {label}:")
    print("train_features[label_indices] shape:", train_features[label_indices].shape)
    print("train_bboxes[label_indices] shape:", train_bboxes[label_indices].shape)
    
    # Fit the regressor with consistent-sized inputs
    regressor.fit(train_features[label_indices], train_bboxes[label_indices])
    bbox_regressors[label] = regressor


Shape of train_features: (221, 256)
Length of train_labels_flattened: 339
Training SVM for label 1:
Binary labels shape: (339,)


ValueError: Found input variables with inconsistent numbers of samples: [221, 339]

In [None]:
# Perform inference on test dataset
def test_model(dataset):
    for image, boxes, lbls in dataset:
        img_resized = tf.image.resize(image, (224, 224))
        feature_vector = cnn.predict(tf.expand_dims(img_resized, axis=0))[0]
        
        # Classification
        class_scores = {label: svm.predict_proba([feature_vector])[0, 1] for label, svm in svms.items()}
        predicted_class = max(class_scores, key=class_scores.get)

        # Bounding Box Regression
        bbox_regressor = bbox_regressors[predicted_class]
        bbox_adjustment = bbox_regressor.predict([feature_vector])[0]
        
        # Original bounding box
        original_bbox = boxes[0].numpy()
        adjusted_bbox = [
            original_bbox[0] + bbox_adjustment[0] * original_bbox[2],
            original_bbox[1] + bbox_adjustment[1] * original_bbox[3],
            original_bbox[2] * np.exp(bbox_adjustment[2]),
            original_bbox[3] * np.exp(bbox_adjustment[3])
        ]
        
        # Non-Maximum Suppression
        final_bboxes = non_max_suppression([adjusted_bbox], [class_scores[predicted_class]])
        
        # Display result
        plt.imshow(image[0].numpy())
        plt.gca().add_patch(plt.Rectangle(
            (final_bboxes[0, 0], final_bboxes[0, 1]), 
            final_bboxes[0, 2] - final_bboxes[0, 0], 
            final_bboxes[0, 3] - final_bboxes[0, 1], 
            edgecolor='red', facecolor='none'))
        plt.title(f'Predicted Class: {predicted_class}')
        plt.show()


In [None]:
# Run testing on a few test samples
test_model(test_dataset)