In [9]:
import numpy as np
import cv2
from object_detection.builders import model_builder
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import config_util
from object_detection.utils import label_map_util
import tensorflow as tf
import os

import six
from playsound import playsound
import collections
import math
import matplotlib.pyplot as plt
import tqdm
%matplotlib inline

In [10]:
PATH_TO_LABELS = os.path.join("inference_graph/labelmap.pbtxt")
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging

tf.get_logger().setLevel('ERROR')   

Načtení modelu

In [11]:

configs = config_util.get_configs_from_pipeline_file(
    "inference_graph/pipeline.config")
model_config = configs['model']
detection_model = model_builder.build(
    model_config=model_config, is_training=False)

Načtení natrénovanách hodnot do modelu.

In [12]:
# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join('inference_graph/checkpoint/', 'ckpt-0')
             ).expect_partial()

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f49bc44d410>

In [13]:

@tf.function
def detect_fn(image):
    """Detect objects in image."""

    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)

    return detections, prediction_dict, tf.reshape(shapes, [-1])


category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)

### Predikce

In [14]:
def get_image_metadata(image_path, cnt, dataset_id=1):
    image_metadata = {}
    image_np = cv2.imread(image_path)
    height, width, _ = image_np.shape
    image_metadata['id'] = cnt
    image_metadata['dataset_id'] = dataset_id
    image_metadata['path'] = image_path
    image_metadata['height'], image_metadata['width'] = height, width
    image_metadata['file_name'] = os.path.basename(image_path)
    return image_metadata

In [15]:
def predict_elephants(image_path):
    image_np = cv2.imread(image_path)
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections, predictions_dict, shapes = detect_fn(input_tensor)
    label_id_offset = 1
    image_np_with_detections = image_np.copy()

    min_score_thresh = 0.40

    box_to_display_str_map = collections.defaultdict(list)
    box_to_color_map = collections.defaultdict(str)

    number_of_items = 0

    for i in range(detections['detection_boxes'][0].numpy().shape[0]):

        if detections['detection_scores'][0].numpy() is None or detections['detection_scores'][0].numpy()[i] > min_score_thresh:

            box = tuple(detections['detection_boxes'][0].numpy()[i].tolist())

            display_str = ''

            if (detections['detection_classes'][0].numpy() + label_id_offset).astype(int)[i] in six.viewkeys(category_index):
                class_name = category_index[(detections['detection_classes'][0].numpy() + label_id_offset).astype(int)[i]]['name']
                display_str = str(class_name)
                display_str = '{}: {}%'.format(display_str, round(
                    100*detections['detection_scores'][0].numpy()[i]))

                box_to_display_str_map[box].append(display_str)

                box_to_color_map[box] = "Red"

                if "Elephant" in box_to_display_str_map[box][0]:
                    number_of_items += 1
    im_width, im_height = image_np.shape[1::-1]

    for box, color in box_to_color_map.items():
        ymin, xmin, ymax, xmax = box

        ymin = ymin * im_height
        xmin = xmin * im_width
        ymax = ymax * im_height
        xmax = xmax * im_width

        x = xmin
        y = ymin
        w = xmax - xmin
        h = ymax - ymin

        box_color = (0, 0, 0)

        if color == "Red":
            box_color = (0, 0, 255)

        cv2.rectangle(image_np_with_detections, (int(x), int(y)),
                        (int(x) + int(w), int(y) + int(h)), box_color, 4)
        cv2.putText(image_np_with_detections, 'Elephant', (int(x), int(
            y)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    return image_np_with_detections, number_of_items, detections



In [16]:
images_metadata = []
tf_detections = []

directory = '../SLONI_label'
for i, filename in enumerate(tqdm.tqdm(os.listdir(directory))):
    image_path = os.path.join(directory, filename)
    with tf.device('/GPU:0'):
        image, count, detections = predict_elephants(image_path)
    
    tf_detections.append(detections)
    metadata = get_image_metadata(image_path, i+1)
    images_metadata.append(metadata)
    # images.append(image)
    # counts.append(count)

    

In [17]:
import json

# Assuming `tf_detections` is a list where each item contains the TensorFlow detection results for an image
# and `images_metadata` is a list of dictionaries with metadata for each image processed

coco_output = {
    "images": [],
    "categories": [
        {"id": 1, "name": "Elephant", "supercategory": "", "color": "#3ab7dd", "metadata": {}, "keypoint_colors": []}
    ],
    "annotations": []
}
annotation_id = 1
def convert_to_coco(tf_detections, images_metadata):
    global annotation_id 
    for image_metadata, detection in zip(images_metadata, tf_detections):
        # Add image info
        coco_output["images"].append({
            "id": image_metadata["id"],
            "dataset_id": image_metadata["dataset_id"],
            "category_ids": [],
            "path": os.path.join('datasets', image_metadata["path"]),
            "width": image_metadata["width"],
            "height": image_metadata["height"],
            "file_name": image_metadata["file_name"],
            "annotated": False,
            "annotating": [],
            "num_annotations": 0,
            "metadata": {},
            "deleted": False,
            "milliseconds": 0,
            "events": [],
            "regenerate_thumbnail": False
        })

        # Process each detection
        for box, score, class_id in zip(detection['detection_boxes'][0], detection['detection_scores'][0], detection['detection_classes'][0]):
            if score < 0.4:  # Assuming a threshold of 0.5 for this example
                continue

            # Convert TensorFlow box format to COCO format
            ymin, xmin, ymax, xmax = box.numpy()
            xmin = xmin * image_metadata["width"]
            ymin = ymin * image_metadata["height"]
            xmax = xmax * image_metadata["width"]
            ymax = ymax * image_metadata["height"]
            ymin, xmin, ymax, xmax = round(ymin, 1), round(xmin, 1), round(ymax, 1), round(xmax, 1)
            x, y, w, h = xmin, ymin, (xmax - xmin), (ymax - ymin)
            x, y, w, h = round(x, 1), round(y, 1), round(w, 1), round(h, 1)
            segmentation_points = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]

            coco_output["annotations"].append({
                "id": annotation_id,
                "image_id": image_metadata["id"],
                "category_id": 1,
                "segmentation": [segmentation_points],  # Add segmentation data here if available
                "area": round(w * h, 0),
                "bbox": [x, y, w, h],
                "iscrowd": False,
                "isbbox": True,
                "color": "#8eb517", 
                "metadata": {}
            })
            annotation_id += 1

    return coco_output

# usage
coco_dataset = convert_to_coco(tf_detections, images_metadata)
with open('annotations.json', 'w', encoding='utf-8') as f:
    json.dump(coco_dataset, f, ensure_ascii=False, indent=4)

In [18]:
# images_metadata = []
# tf_detections = []

# directory = '../SLONI_label'
# for i, filename in enumerate(tqdm.tqdm(os.listdir(directory))):
#     image_path = os.path.join(directory, filename)
#     with tf.device('/GPU:0'):
#         image, count, detections = predict_elephants(image_path)
    
#     tf_detections.append(detections)
#     metadata = get_image_metadata(image_path, i+1)
#     images_metadata.append(metadata)

#     if i == len(os.listdir(directory))//2:
#         coco_output = convert_to_coco(tf_detections, images_metadata)
#         assert len(tf_detections) == len(images_metadata)
#         tf_detections.clear()
#         images_metadata.clear()

# coco_dataset = convert_to_coco(tf_detections, images_metadata) 
# with open('annotations.json', 'w', encoding='utf-8') as f:
#     json.dump(coco_dataset, f, ensure_ascii=False, indent=4)

  0%|          | 0/4671 [00:00<?, ?it/s]2024-02-08 23:02:28.929703: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8600
100%|██████████| 4671/4671 [16:30<00:00,  4.72it/s]   
