In [1]:
import tensorflow as tf

from akida_models import fetch_file

# Download TFrecords test set from Brainchip data server
data_path = fetch_file(
    fname="voc_test_20_classes.tfrecord",
    origin="https://data.brainchip.com/dataset-mirror/voc/test_20_classes.tfrecord",
    cache_subdir='datasets/voc',
    extract=True)



# Helper function to load and parse the Tfrecord file.
def load_tf_dataset(tf_record_file_path):
    tfrecord_files = [tf_record_file_path]

    # Feature description for parsing the TFRecord
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'objects/bbox': tf.io.VarLenFeature(tf.float32),
        'objects/label': tf.io.VarLenFeature(tf.int64),
    }

    def _count_tfrecord_examples(dataset):
        return len(list(dataset.as_numpy_iterator()))

    def _parse_tfrecord_fn(example_proto):
        example = tf.io.parse_single_example(example_proto, feature_description)

        # Decode the image from bytes
        example['image'] = tf.io.decode_jpeg(example['image'], channels=3)

        # Convert the VarLenFeature to a dense tensor
        example['objects/label'] = tf.sparse.to_dense(example['objects/label'], default_value=0)

        example['objects/bbox'] = tf.sparse.to_dense(example['objects/bbox'])
        # Boxes were flattenned that's why we need to reshape them
        example['objects/bbox'] = tf.reshape(example['objects/bbox'],
                                             (tf.shape(example['objects/label'])[0], 4))
        # Create a new dictionary structure
        objects = {
            'label': example['objects/label'],
            'bbox': example['objects/bbox'],
        }

        # Remove unnecessary keys
        example.pop('objects/label')
        example.pop('objects/bbox')

        # Add 'objects' key to the main dictionary
        example['objects'] = objects

        return example

    # Create a TFRecordDataset
    dataset = tf.data.TFRecordDataset(tfrecord_files)
    len_dataset = _count_tfrecord_examples(dataset)
    #len_dataset = sum(1 for _ in val_dataset)
    parsed_dataset = dataset.map(_parse_tfrecord_fn)

    return parsed_dataset, len_dataset


labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
          'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
          'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
          'train', 'tvmonitor']

val_dataset, len_val_dataset = load_tf_dataset(data_path)
print(f"Loaded VOC2007 sample test data: {len_val_dataset} images.")

Loaded VOC2007 sample test data: 100 images.


In [2]:
from akida_models.detection.generate_anchors import generate_anchors

num_anchors = 5
grid_size = (7, 7)
anchors = generate_anchors(val_dataset, num_anchors, grid_size)


Average IOU for 5 anchors: 0.70
Anchors:  [[1.12454, 1.84751], [1.93628, 2.82636], [3.16509, 3.61125], [4.55423, 5.11091], [5.43139, 5.86134]]


In [3]:
from timeit import default_timer as timer
from akida_models import yolo_voc_pretrained
from akida_models.detection.map_evaluation import MapEvaluation

# Load the pretrained model along with anchors
model_keras, anchors = yolo_voc_pretrained()
model_keras.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 224, 224, 3)]     0         
                                                                 
 rescaling (QuantizedRescali  (None, 224, 224, 3)      0         
 ng)                                                             
                                                                 
 conv_0 (QuantizedConv2D)    (None, 112, 112, 16)      448       
                                                                 
 conv_0/relu (QuantizedReLU)  (None, 112, 112, 16)     32        
                                                                 
 conv_1 (QuantizedConv2D)    (None, 112, 112, 32)      4640      
                                                                 
 conv_1/relu (QuantizedReLU)  (None, 112, 112, 32)     64        
                                                           

In [4]:
# Define QuantizationParams with specific values just for the sake of understanding the JSON
# configuration that follows.

import quantizeml
import json
from quantizeml.models import quantize, QuantizationParams,dump_config
qparams = QuantizationParams(input_weight_bits=8, weight_bits=8, activation_bits=8, output_bits=8,
                             per_tensor_activations=True, buffer_bits=24)

# Quantize the model
quantized_model = quantize(model_keras, qparams=qparams)
quantized_model.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 224, 224, 3)]     0         
                                                                 
 rescaling (QuantizedRescali  (None, 224, 224, 3)      0         
 ng)                                                             
                                                                 
 conv_0 (QuantizedConv2D)    (None, 112, 112, 16)      448       
                                                                 
 conv_0/relu (QuantizedReLU)  (None, 112, 112, 16)     32        
                                                                 
 conv_1 (QuantizedConv2D)    (None, 112, 112, 32)      4640      
                                                                 
 conv_1/relu (QuantizedReLU)  (None, 112, 112, 32)     64        
                                                           

In [None]:
# Define QuantizationParams with specific values just for the sake of understanding the JSON
# configuration that follows.

import quantizeml
import json
from quantizeml.models import quantize, QuantizationParams,dump_config
qparams = QuantizationParams(input_weight_bits=8, weight_bits=4, activation_bits=4,
                             per_tensor_activations=True, buffer_bits=24)

# Quantize the model
quantized_model = quantize(model_keras, qparams=qparams)
quantized_model.summary()


In [None]:
# Define QuantizationParams with specific values just for the sake of understanding the JSON
# configuration that follows.

import quantizeml
import json
from quantizeml.models import quantize, QuantizationParams,dump_config
qparams = QuantizationParams(input_weight_bits=8, weight_bits=2, activation_bits=2, output_bits=2,
                             per_tensor_activations=True, buffer_bits=24)

# Quantize the model
quantized_model = quantize(model_keras, qparams=qparams)
quantized_model.summary()

In [6]:
import numpy
from tensorflow.keras import Model
from tensorflow.keras.layers import Reshape
from akida_models.detection.map_evaluation import MapEvaluation
from timeit import default_timer as timer


# Define the final reshape and build the model
classes = len(labels)
model_keras = quantized_model
output = Reshape((grid_size[1], grid_size[0], num_anchors, 4 + 1 + classes),
                 name="YOLO_output")(model_keras.output)
model_keras = Model(model_keras.input, output)

# Create the mAP evaluator object
map_evaluator = MapEvaluation(model_keras, val_dataset,
                              len_val_dataset, labels, anchors)

# Compute the scores for all validation images
start = timer()

map_dict, average_precisions = map_evaluator.evaluate_map()
mAP = sum(map_dict.values()) / len(map_dict)
end = timer()

for label, average_precision in average_precisions.items():
    print(labels[label], '{:.4f}'.format(average_precision))
print('mAP 50: {:.4f}'.format(map_dict[0.5]))
print('mAP 75: {:.4f}'.format(map_dict[0.75]))
print('mAP: {:.4f}'.format(mAP))
print(f'Keras inference on {len_val_dataset} images took {end-start:.2f} s.\n')

                                                                                         

aeroplane 0.7300
bicycle 0.4417
bird 0.4833
boat 0.3070
bottle 0.2627
bus 0.7147
car 0.6889
cat 0.7670
chair 0.2726
cow 0.3700
diningtable 0.4711
dog 0.5736
horse 0.6147
motorbike 0.5083
person 0.4021
pottedplant 0.1094
sheep 0.2976
sofa 0.6283
train 0.6042
tvmonitor 0.5643
mAP 50: 0.8409
mAP 75: 0.5086
mAP: 0.4906
Keras inference on 100 images took 18.83 s.





In [None]:
# Rebuild a model without the last layer
compatible_model = Model(model_keras.input, model_keras.layers[-1].output)

In [None]:
from cnn2snn import convert

model_akida = convert(compatible_model)
model_akida.summary()
#
#compatible_model_no_remove = Model(quantized_model.input, quantized_model.layers[0].output)
#compatible_model_1_remove = Model(quantized_model.input, quantized_model.layers[-1].output)
#compatible_model_2_remove = Model(quantized_model.input, quantized_model.layers[-2].output)
#
#compatible_model_no_remove.summary()
#compatible_model_1_remove.summary()
#compatible_model_2_remove.summary()
#
##quantized_model.summary()
#model_akida = convert(compatible_model_1_remove)
#model_akida.summary()

In [None]:
import time
class Timer:
    def __init__(self):
        self.start_time = None
        self.end_time = None

    def start(self):
        self.start_time = time.perf_counter()

    def stop(self):
        self.end_time = time.perf_counter()
        return self.end_time - self.start_time

In [None]:
for element in val_dataset.take(1):
    first_image = element['image']
    break

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import time

from akida_models.detection.processing import preprocess_image, decode_output

# Shuffle the data to take a random test image
#val_dataset = val_dataset.shuffle(buffer_size=1)


input_shape = model_akida.layers[0].input_dims

# Load the image
# raw_image = next(iter(val_dataset))['image']
raw_image=first_image

# Keep the original image size for later bounding boxes rescaling
raw_height, raw_width, _ = raw_image.shape

# Pre-process the image
image = preprocess_image(raw_image, input_shape)
input_image = image[np.newaxis, :].astype(np.uint8)
timer = timer
# Call evaluate on the image
timer = Timer()
timer.start()
pots = model_akida.predict(input_image)[0]
prediction_time = timer.stop()
print(f"Prediction took {prediction_time:.4f} seconds")
# Reshape the potentials to prepare for decoding
h, w, c = pots.shape
pots = pots.reshape((h, w, len(anchors), 4 + 1 + len(labels)))

# Decode potentials into bounding boxes
raw_boxes = decode_output(pots, anchors, len(labels))

# Rescale boxes to the original image size
pred_boxes = np.array([[
    box.x1 * raw_width, box.y1 * raw_height, box.x2 * raw_width,
    box.y2 * raw_height,
    box.get_label(),
    box.get_score()
] for box in raw_boxes])

fig = plt.figure(num='VOC detection by Akida')
ax = fig.subplots(1)
img_plot = ax.imshow(np.zeros(raw_image.shape, dtype=np.uint8))
img_plot.set_data(raw_image)

for box in pred_boxes:
    rect = patches.Rectangle((box[0], box[1]),
                             box[2] - box[0],
                             box[3] - box[1],
                             linewidth=1,
                             edgecolor='r',
                             facecolor='none')
    ax.add_patch(rect)
    class_score = ax.text(box[0],
                          box[1] - 5,
                          f"{labels[int(box[4])]} - {box[5]:.2f}",
                          color='red')

plt.axis('off')
plt.show()

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Labels for the PASCAL VOC dataset
labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
          'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
          'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
          'train', 'tvmonitor']

# Initialize video capture from the inbuilt camera
video_capture = cv2.VideoCapture(0)

# Define anchors (you might need to adjust these based on your model)
#anchors = np.array([[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]])

while True:
    ret, frame = video_capture.read()
    if not ret:
        break

    # Keep the original image size for later bounding boxes rescaling
    raw_height, raw_width, _ = frame.shape

    # Pre-process the image
    input_shape = model_akida.input_shape[1:3]  # Correctly access input shape
    image = preprocess_image(frame, input_shape)
    input_image = image[np.newaxis, :].astype(np.uint8)

    # Call evaluate on the image
    timer = Timer()
    timer.start()
    pots = model_akida.predict(input_image)[0]
    prediction_time = timer.stop()
    print(f"Prediction took {prediction_time:.4f} seconds")
    
    # Reshape the potentials to prepare for decoding
    h, w, c = pots.shape
    pots = pots.reshape((h, w, len(anchors), 4 + 1 + len(labels)))

    # Decode potentials into bounding boxes
    raw_boxes = decode_output(pots, anchors, len(labels))

    # Rescale boxes to the original image size
    pred_boxes = np.array([[
        box.x1 * raw_width, box.y1 * raw_height, box.x2 * raw_width,
        box.y2 * raw_height,
        box.get_label(),
        box.get_score()
    ] for box in raw_boxes])

    # Draw bounding boxes on the frame
    for box in pred_boxes:
        cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
        label = f"{labels[int(box[4])]} - {box[5]:.2f}"
        cv2.putText(frame, label, (int(box[0]), int(box[1]) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('YOLO Video Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close windows
video_capture.release()
cv2.destroyAllWindows()