In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

In [2]:
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
# change for other models - '../exported-models/robo6-3-1' is recommended
PATH_TO_MODEL_DIR = '../exported-models/robo6-3-1'

PATH_TO_LABELS = '../annotations/label_map.pbtxt'

In [4]:
import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"

print('Loading model...', end='')
start_time = time.time()

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model...Done! Took 8.620329856872559 seconds


In [5]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)

In [8]:
import numpy as np
import warnings
import ipywidgets as widgets
from IPython.display import display
from PIL import ImageGrab
from PIL import Image
import win32gui
import cv2
warnings.filterwarnings('ignore')   # Suppress Matplotlib warnings

live_view = widgets.Image()

def perform_inference(image):

    image_np = np.array(image)

    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image_np)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]

    # input_tensor = np.expand_dims(image_np, 0)
    detections = detect_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                   for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=13,
          min_score_thresh=.30,
          agnostic_mode=False)

    _, encoded_image = cv2.imencode('.png', image_np_with_detections[:, :, [2, 1, 0]])
    live_view.value = encoded_image.tobytes()


display(live_view)


try:
    while(True):
        hwnd = win32gui.FindWindow(None, r'SimSpark')
        dimensions = win32gui.GetWindowRect(hwnd)

        # the dimensions have a boarder around the window, we subtract that
        x1, y1, x2, y2 = dimensions

        x1 += 8
        y1 += 31
        x2 -= 8
        y2 -= 8

        dimensions = (x1, y1, x2, y2)

        # capture the screen 
        image = ImageGrab.grab(dimensions)
        
        perform_inference(image)


except KeyboardInterrupt:
    print('Stopped')

Image(value=b'')

Stopped
