In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import tempfile
import numpy as np

from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

import cv2
from tqdm import tqdm
import time
print(tf.__version__)
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

2.5.0
The following GPU devices are available: /device:GPU:0


In [3]:
model = hub.load("https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1")

In [2]:
import pandas as pd

coco_labels_path = './../data/coco_labels.csv'
coco_labels = pd.read_csv(coco_labels_path, header=None, index_col=0)
labels_dict = coco_labels.to_dict()[1]

In [None]:
labels_dict

In [None]:
import imageio
path = 'data/images/2.jpg'

image_tensor = imageio.imread(path)[np.newaxis, ...]
detector_output = model(image_tensor)
class_ids = detector_output["detection_classes"]

In [None]:
### DRAWING UTILS

def display_image(image):
    fig = plt.figure(figsize=(20, 15))
    plt.grid(False)
    plt.imshow(image)

def download_and_resize_image(url, new_width=256, new_height=256, display=False):
    _, filename = tempfile.mkstemp(suffix=".jpg")
    response = urlopen(url)
    image_data = response.read()
    image_data = BytesIO(image_data)
    pil_image = Image.open(image_data)
    pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)
    pil_image_rgb = pil_image.convert("RGB")
    pil_image_rgb.save(filename, format="JPEG", quality=90)
    print("Image downloaded to %s." % filename)
    if display:
        display_image(pil_image)
    return filename

def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color, font, thickness=4, display_str_list=()):
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=thickness, fill=color)

    display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
    for display_str in display_str_list[::-1]:
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                        (left + text_width, text_bottom)],
                       fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  display_str,
                  fill="black",
                  font=font)
        text_bottom -= text_height - 2 * margin

def draw_boxes(image, boxes, class_names, scores, max_boxes=100, min_score=0.3):
    colors = list(ImageColor.colormap.values())
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf", 25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()

    for i in range(min(boxes.shape[0], max_boxes)):
        if scores[i] >= min_score:
            ymin, xmin, ymax, xmax = tuple(boxes[i])
            display_str = "{}: {}%".format(labels_dict[class_names[i]], int(100 * scores[i]))
            color = colors[hash(class_names[i]) % len(colors)]
            image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
            draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, font, display_str_list=[display_str])
            np.copyto(image, np.array(image_pil))
    return image


In [None]:
def load_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    print(type(img))
    return img

def run_detector(detector, path):
    img = load_img(path)
    print(img.shape)
    start_time = time.time()
    result = detector(img[np.newaxis, ...])
    end_time = time.time()
    
    result = {key:value.numpy() for key,value in result.items()}
    print("Found %d objects." % len(result["detection_scores"]))
    print("Inference time: ", end_time-start_time)
    image_with_boxes = draw_boxes( img.numpy(), result["detection_boxes"][0], result["detection_classes"][0], result["detection_scores"][0])
    display_image(image_with_boxes)
    
def run_detector_from_camera(detector): 
    cap1 = cv2.VideoCapture(0) 
    try:
        start = time.time()
        for i in tqdm(range(0, 10)):
            ret, img = cap1.read()
            img = tf.convert_to_tensor(img, dtype=tf.uint8)
            img = tf.expand_dims(img, 0)
            result = detector(img)
            print("detection_boxes", result["detection_boxes"][0])
            print("detection_classes", result["detection_classes"][0])
            print("detection_scores", result["detection_scores"][0])
            image_with_boxes = draw_boxes(img, result["detection_boxes"][0], result["detection_classes"][0], result["detection_scores"][0])
            cv2.imshow("dsaf", img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap1.release()
        cv2.destroyAllWindows()  

In [None]:
run_detector_from_camera(model)