In [1]:
import cv2
import time
import pyttsx3
import threading
import webcolors
import numpy as np
import numexpr as ne
import face_recognition

from tqdm import tqdm
from imutils import paths
from collections import Counter
from sklearn.cluster import KMeans
from darkflow.net.build import TFNet
from multiprocessing.dummy import Pool as ThreadPool
from webcolors import rgb_percent_to_hex, hex_to_name, hex_to_rgb, rgb_to_hex, css3_hex_to_names

DATASET_FILE_NAME = 'model_encodings.pickle'
FONT = cv2.FONT_HERSHEY_SIMPLEX

pool = ThreadPool(processes=10)
engine = pyttsx3.init()

%config InlineBackend.figure_format = 'svg'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Extract color

In [2]:
def color(image):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = image.reshape((image.shape[1] * image.shape[0], 3))

    clt = KMeans(n_clusters = 8)
    clt.fit(image)
    hist = centroid_histogram(clt)
    cluster_centers = list(clt.cluster_centers_)
    labs = ['#' + ''.join(["%0.2X"%(j) for j in i.astype("uint8").tolist()]) for i in clt.cluster_centers_]

    INDEX = list(hist).index(max(hist))

    return (labs[INDEX],  clt.cluster_centers_[INDEX][::-1])

def _hex_to_name(requested_colour):
    try:
        closest_name = actual_name = hex_to_name(requested_colour)
    except ValueError:
        closest_name = closest_colour(requested_colour)
        actual_name = None
    return actual_name, closest_name

def closest_colour(requested_colour):
    min_colours = {}
    try:
        requested_colour = hex_to_rgb(requested_colour)
    except:
        pass
    for key, name in css3_hex_to_names.items():
        r_c, g_c, b_c = hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]

### Remove background

In [3]:
def rm_bg(img):
  hMin = 29  # Hue minimum
  sMin = 30  # Saturation minimum
  vMin = 0   # Value minimum (Also referred to as brightness)
  hMax = 179 # Hue maximum
  sMax = 255 # Saturation maximum
  vMax = 255 # Value maximum
  # Set the minimum and max HSV values to display in the output image using numpys' array function. We need the numpy array since OpenCVs' inRange function will use those.
  lower = np.array([hMin, sMin, vMin])
  upper = np.array([hMax, sMax, vMax])
  # Create HSV Image and threshold it into the proper range.
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Converting color space from BGR to HSV
  mask = cv2.inRange(hsv, lower, upper) # Create a mask based on the lower and upper range, using the new HSV image
  # Create the output image, using the mask created above. This will perform the removal of all unneeded colors, but will keep a black background.
  output = cv2.bitwise_and(img, img, mask=mask)
  # Add an alpha channel, and update the output image variable
  *_, alpha = cv2.split(output)
  dst = cv2.merge((output, alpha))
  return output

### TTS

In [4]:
def say(label):
    try:
        engine.say(label)
        engine.runAndWait()
        engine.stop()
    except: # Exception as e:
        pass

In [5]:
object_with_colors = ['car', 'motorbike', "chair", "bird", "cat", "cat", "dog", "horse", "cow", "umbrella","handbag", "tie","suitcase","frisbee","kite","skateboard","bottle","cup","fork","knife","spoon""cell phone","knife","sofa","mouse","cake","clock","toothbrush",'mouse','bottle','laptop']

### Recognize

In [34]:
colors = [tuple(255 * np.random.rand(3)) for i in range(7)]

def predict(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return tfnet.return_predict(img)

def fetchMetaInformation(result, image):
    label = result['label']

    tl = (result['topleft']['x'],result['topleft']['y'])
    br = (result['bottomright']['x'],result['bottomright']['y'])
    center = ((tl[1]+br[1]) // 2 , (tl[0]+br[0]) // 2)
    shape = image.shape
    
    h1_3 = shape[0] // 3
    w1_3 = shape[1] // 3

    pos = [
        ((0, 0), "top left"),
        ((0, w1_3), "top center"),
        ((0, 2 * w1_3), "top right"),
        
        ((h1_3, 0), "middle left"),
        ((h1_3, w1_3), "middle center"),
        ((h1_3, 2 * w1_3), "middle right"),

        ((2 * h1_3, 0), "bottom left"),
        ((2 * h1_3, w1_3), "bottom center"),
        ((2 * h1_3, 2 * w1_3), "bottom right")
    ]

    for i in pos:
        p = i[0]

        if (
            (p[0] < center[0] and p[1] < center[1]) and
            ((center[0] < (p[0] + h1_3)) and (center[1] < (p[1] + w1_3)))
        ):
            label += " in %s" % (i[1])
            break
        
    if label in object_with_colors:
        try:

            color = cv2.resize(clipped, (1,1))[0][0]

#             clipped = rm_bg(clipped)
            actual =_hex_to_name(rgb_to_hex(color))[1]
            return "%s %s" % (actual, label)
        except:
            actual = ''
            pass

    return label

def recz(img, prev_output):
    label=""

    stime = time.time()
    results = predict(img)

    out_string = "There is "
    c = 0

    for clr, result in zip(colors, results):
        tl = (result['topleft']['x'], result['topleft']['y'])
        br = (result['bottomright']['x'], result['bottomright']['y'])
        label = fetchMetaInformation(result, img)

        out_string += "a %s, " % (label)
        
        img = cv2.rectangle(img, tl, br, clr, 7)
        img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

        c += 1

    out_string = out_string[:-2]

    if(c > 0 and prev_output != out_string):
        threading.Thread(target=say, args=(out_string + ".", )).start()
        prev_output = out_string

    fps = 1 / (time.time() - stime)

    img = cv2.putText(
        img,
        'FPS {:.1f}'.format(fps),
        (10, 40),
        cv2.FONT_HERSHEY_COMPLEX,
        .8,
        (255, 255, 255),
        2
    )
    
    print('\rFPS %.1f: %s.%s' % (fps, out_string, " " * 30), end="\r")

    return (img, prev_output)

"Ready!"

'Ready!'

# Operation

In [7]:
accuracy = .5
options = {
    'model': 'cfg/yolo.cfg',
    'load': 'bin/yolo.weights',
    'threshold': accuracy,
    'gpu': 0.22
}
tfnet = TFNet(options)

Parsing ./cfg/yolo.cfg
Parsing cfg/yolo.cfg
Loading bin/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.023951053619384766s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  Yep

## Capture from camera

In [35]:
cap = cv2.VideoCapture(0)

pool = ThreadPool(processes=10)
prev_output = ""

engine.say("Started")
while(True):
    ret, frame = cap.read()

    img, prev_output  = recz(frame, prev_output)

    cv2.imshow('ImageWindow', img)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        cap.release()
        break

FPS 12.1: There is a person in middle right, a person in middle center.                                                             

## Capture from window

In [42]:
import win32gui
from PIL import ImageGrab

# hwnd = win32gui.FindWindow(None, r'192.168.43.56:4747/video')
# hwnd = win32gui.FindWindow(None, r'DroidCam Video Feed')
# hwnd = win32gui.FindWindow(None, r'video - VLC Media Player')
hwnd = win32gui.FindWindow(None, r'DroidCam Video')
dimensions = win32gui.GetWindowRect(hwnd)
# win32gui.SetForegroundWindow(hwnd)

pool = ThreadPool(processes=3)
prev_output = ""

engine.say("Started")

while(True):
    image = ImageGrab.grab(dimensions)
    img = np.array(image)
    img = img[:, :, ::-1].copy()
    
    img = img[110: img.shape[0]-10, 10:img.shape[1]-10]

    img, prev_output  = recz(img, prev_output)

    cv2.imshow('ImageWindow', img)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

FPS 7.5: There i.                                                                                                                                      