In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np  
import pyttsx3
import threading

from darkflow.net.build import TFNet
from multiprocessing.dummy import Pool as ThreadPool
from time import time
from tqdm import tqdm
from sklearn.cluster import KMeans
from webcolors import rgb_percent_to_hex, hex_to_name, hex_to_rgb, css3_hex_to_names

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Color Extraction

In [2]:
def centroid_histogram(clt):
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)

    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()

    return hist

def get_color(image):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = image.reshape((image.shape[1] * image.shape[0], 3))

    clt = KMeans(n_clusters = 2)
    clt.fit(image)
    hist = centroid_histogram(clt)
    cluster_centers = list(clt.cluster_centers_)
    labs = ['#' + ''.join(["%0.2X"%(j) for j in i.astype("uint8").tolist()]) for i in clt.cluster_centers_]
    
#     plt_graph(hist, cluster_centers)
    
    INDEX = list(hist).index(max(hist))
    
    return (labs[INDEX],  clt.cluster_centers_[INDEX][::-1])

### Hex to name

In [3]:
def _hex_to_name(requested_colour):
    try:
        closest_name = actual_name = hex_to_name(requested_colour)
    except ValueError:
        closest_name = closest_colour(requested_colour)
        actual_name = None
    return actual_name, closest_name

def closest_colour(requested_colour):
    min_colours = {}
    try:
        requested_colour = hex_to_rgb(requested_colour)
    except:
        pass
    for key, name in css3_hex_to_names.items():
        r_c, g_c, b_c = hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]

### Reconize Object

In [4]:
object_with_colors = [
    'car',
    'motorbike',
    "bird",
    "cat",
    "dog",
    "horse",
    "cow",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "kite",
    "skateboard",
    "bottle",
    "cup",
    "fork",
    "knife",
    "spoon"
    "cell phone",
    "knife",
    "sofa",
    "mouse",
    "cake",
    "clock",
    "toothbrush",
    'mouse',
    'bottle',
    'laptop'
    'teady bear'
]
font = cv2.FONT_HERSHEY_COMPLEX

In [5]:
def say(label):
    try:
        pass
#         engine.say(label)
#         engine.runAndWait()
#         engine.stop()
    except: # Exception as e:
        pass

In [6]:
def fillInfo(image, result):
    label = result['label']
    image_label = label + " " + ("{:.2f}".format(result['confidence'])) + '%'

    tl = (result['topleft']['x'],result['topleft']['y'])
    br = (result['bottomright']['x'],result['bottomright']['y'])

    clipped = image[tl[1]:br[1], tl[0]:br[0]]

    hex_color, colr_set = get_color(clipped)
    if label in object_with_colors:    
        color = _hex_to_name(hex_color)[1]
        image_label = color + " " + image_label
        label = color + " " + label

#     colr_set = tuple(255 * np.random.rand(3))
    (text_width, text_height) = cv2.getTextSize(image_label, font, fontScale=1, thickness=1)[0]
    text_offset_x = 10
    text_offset_y = image.shape[0] - 25
    box_coords = ((tl[0], tl[1]), (tl[0] + text_width + 2, tl[1]-10 - text_height - 2))
    cv2.rectangle(image, box_coords[0], box_coords[1], colr_set, cv2.FILLED)
    cv2.putText(image, image_label, (tl[0], tl[1]-10), font, fontScale=1, color=(255,255,255), thickness=1)

    image = cv2.rectangle(image, tl, br, colr_set, 2)

    return image, label

def predict(image):
    return tfnet.return_predict(image)

def recz(image, prev_output):
    label = ""
    async_result = pool.apply_async(predict, (image, ))

#     stime = time()
    results = async_result.get()

    out_string = "There is "
    c = 0

    for result in results:
        image, label = fillInfo(image, result)

        out_string += "%s, " % (label)
        c += 1
   
    if(c > 0 and prev_output != out_string):
        out_string = out_string[:-2]
        out_string += ".";   
        threading.Thread(target=say, args=(out_string, )).start()

#     fps = 1 / (time() - stime)

#     fps = 'FPS {:.1f}'.format(fps)
#     (text_width, text_height) = cv2.getTextSize(fps, font, fontScale=1, thickness=1)[0]
#     box_coords = ((0, 0), (text_width + 4, text_height + 4))
#     cv2.rectangle(image, box_coords[0], box_coords[1], (255,255,255), cv2.FILLED)
#     cv2.putText(image, fps, (2, text_height + 2), font, fontScale=1, color=(0, 0, 255), thickness=1)

    (text_width, text_height) = cv2.getTextSize(out_string, font, fontScale=1, thickness=1)[0]
    box_coords = ((0, image.shape[0] - text_height - 4), (text_width + 2, image.shape[0]))
    cv2.rectangle(image, box_coords[0], box_coords[1], (255,255,255), cv2.FILLED)
    cv2.putText(image, out_string, (1, image.shape[0] - 2 ), font, 1, (0, 0, 0), 1)

    return image, out_string

In [7]:
threshold = 0.45
options = {
    'model': 'cfg/yolo.cfg',
    'load': 'bin/yolo.weights',
    'threshold': threshold,
    'gpu': 0.85
}

tfnet = TFNet(options)

Parsing ./cfg/yolo.cfg
Parsing cfg/yolo.cfg
Loading bin/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.07779097557067871s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  Yep!

KeyboardInterrupt: 

In [None]:
cap = cv2.VideoCapture(0)

pool = ThreadPool(processes=10)
prev_output = ""
engine = pyttsx3.init()
engine.say("Started")

while(True):
    ret, frame = cap.read()

    image, prev_output  = recz(frame, prev_output)

    print(prev_output)

    cv2.imshow('ImageWindow', image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        cap.release()
        break

In [None]:
import win32gui
from PIL import ImageGrab

# hwnd = win32gui.FindWindow(None, r'192.168.43.56:4747/video')
hwnd = win32gui.FindWindow(None, r'DroidCam Video Feed')
# hwnd = win32gui.FindWindow(None, r'a.mp4 - VLC Media Player')
# hwnd = win32gui.FindWindow(None, r'DroidCam Video')
dimensions = win32gui.GetWindowRect(hwnd)
# win32gui.SetForegroundWindow(hwnd)

pool = ThreadPool(processes=3)
prev_output = ""

engine.say("Started")

while(True):
    image = ImageGrab.grab(dimensions)
    img = np.array(image)
    img = img[:, :, ::-1].copy()

    img, prev_output  = recz(img, prev_output)

    cv2.imshow('ImageWindow', img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

### Make Video

In [68]:
output_count = 0

In [77]:
import sys

NPUT_FILE = "./VID_20200501_102816.mp4"
OUTPUT_FILE = "./outputs/vid_" + str(output_count) + ".avi"

pool = ThreadPool(processes=3)
prev_output = ""

ORGINAL_WIDTH = 3840
ORGINAL_HEIGHT = 2160

RESZIE_WIDTH = int(ORGINAL_WIDTH * 0.5)
RESIZE_HEIGHT = int(ORGINAL_HEIGHT * 0.5) #int(9 / 16 * RESZIE_WIDTH)

vid = cv2.VideoCapture(INPUT_FILE)
out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc(*'DIVX'), 25, ( RESIZE_HEIGHT, RESZIE_WIDTH))
# total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
total = 280
for i in range(0, total + 1):
    s = time()
    sys.stdout.flush()
    check, frame = vid.read()
    frame = cv2.resize(frame, (RESZIE_WIDTH, RESIZE_HEIGHT))
    frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
    if frame is None:
        break
    
    image, prev_output  = recz(frame, prev_output)
    out.write(image)
#     cv2.imshow('ImageWindow', frame)

#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         cv2.destroyAllWindows()
#         cap.release()
#         break
    e = time()
    print("\r{:d}/{:d} {:.2f}s".format(i, total, e-s), end="\r")


vid.release()
out.release()
cv2.destroyAllWindows()

output_count += 1

280/280 9.57ss