#### Import's

In [5]:
import cv2
import time
import pyttsx3
import threading
import webcolors
import numpy as np  
import numexpr as ne
import face_recognition

from imutils import paths
from collections import Counter
from sklearn.cluster import KMeans
from multiprocessing.dummy import Pool as ThreadPool

DATASET_FILE_NAME = 'model_encodings.pickle'
FONT = cv2.FONT_HERSHEY_SIMPLEX

%config InlineBackend.figure_format = 'svg'

### Extract color

In [None]:
def centroid_histogram(clt):
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)

    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()

    return hist

def color(image):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = image.reshape((image.shape[1] * image.shape[0], 3))

    clt = KMeans(n_clusters = 8)
    clt.fit(image)
    hist = centroid_histogram(clt)
    cluster_centers = list(clt.cluster_centers_)
    labs = ['#' + ''.join(["%0.2X"%(j) for j in i.astype("uint8").tolist()]) for i in clt.cluster_centers_]

    INDEX = list(hist).index(max(hist))

    return (labs[INDEX],  clt.cluster_centers_[INDEX][::-1])


def _hex_to_name(requested_colour):
    try:
        closest_name = actual_name = hex_to_name(requested_colour)
    except ValueError:
        closest_name = closest_colour(requested_colour)
        actual_name = None
    return actual_name, closest_name

def closest_colour(requested_colour):
    min_colours = {}
    try:
        requested_colour = hex_to_rgb(requested_colour)
    except:
        pass
    for key, name in css3_hex_to_names.items():
        r_c, g_c, b_c = hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]

In [None]:
pool = ThreadPool(processes=10)

engine = pyttsx3.init()
colors = [tuple(255 * np.random.rand(3)) for i in range(7)]
data = pickle.loads(open(DATASET_FILE_NAME, "rb").read())

def predict(img):
    converted_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return tfnet.return_predict(converted_img)

def say(label):
    try:
        engine.say(label)
        engine.runAndWait()
        engine.stop()
    except: # Exception as e:
        pass

def fetchMetaInformation(result, image):
    label = result['label']

    tl = (result['topleft']['x'],result['topleft']['y'])
    br = (result['bottomright']['x'],result['bottomright']['y'])

    clipped = image[tl[1]:br[1], tl[0]:br[0]]

    if label in object_with_colors:
        try:
            clipped = image[tl[1]:br[1], tl[0]:br[0]]
#             color = cv2.resize(clipped, (1,1))
#             print(color);
#             clipped = rm_bg(clipped)
#                 _, actual = color(clipped)
        except:
            actual = ''
            pass

    #  clipped = cv2.putText(clipped, actual, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

    #  return "%s %s" % (actual, label)

    #     if label == "person":
    #         label = get_name(image)
    #         print("person", label)
    return label

def recz(img, prev_output):
    label=""
    async_result = pool.apply_async(predict, (img, ))

    stime = time.time()
    results = async_result.get()

    out_string = "There is "
    c = 0

    print(results)

    for clr, result in zip(colors, results):
        tl = (result['topleft']['x'], result['topleft']['y'])
        br = (result['bottomright']['x'], result['bottomright']['y'])
        label = fetchMetaInformation(result, img)

        out_string += "a %s, " % (label)

        img = cv2.rectangle(img, tl, br, clr, 7)
        img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

        c += 1

    if(c > 0 and prev_output != out_string):
        threading.Thread(target=say, args=(out_string + ".", )).start()
        prev_output = out_string

    fps = 1 / (time.time() - stime)

    img = cv2.putText(
        img,
        'FPS {:.1f}'.format(fps),
        (10, 40),
        cv2.FONT_HERSHEY_COMPLEX,
        .8,
        (255, 255, 255),
        2
    )

    print('\rFPS {:.1f}'.format(fps), end="\r")

    return (img, prev_output)

In [10]:
BLUR = 24
CANNY_THRESH_1 = 10
CANNY_THRESH_2 = 200
MASK_DILATE_ITER = 10
MASK_ERODE_ITER = 10
MASK_COLOR = (0.0,0.0,1.0) # In BGR format

object_with_colors = [
    'car',
    'motorbike',
    "bird",
    "cat",
    "dog",
    "horse",
    "cow",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "kite",
    "skateboard",
    "bottle",
    "cup",
    "fork",
    "knife",
    "spoon"
    "cell phone",
    "knife",
    "sofa",
    "mouse",
    "cake",
    "clock",
    "toothbrush",
    'mouse',
    'bottle',
    'laptop'
 ]

def get_name(image):
    boxes = face_recognition.face_locations(image, model='hop')
    encodings = face_recognition.face_encodings(image, boxes)
    # find faces
    for encoding in encodings:
        matches = face_recognition.compare_faces(data["encodings"], encoding)
        num_prop = 0

        if True in matches:
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}

            for i in matchedIdxs:
                name = data["names"][i]
            name = max(counts, key=counts.get)
            num_prop = counts[name] / num_names[name]

            print("[INFO] detected %s with '%f' accuracy ..." %
                  (name, num_prop))
            
        return name if num_prop > 0.85 else "Unknown"
    return None

def fetchMetaInformation(result, image):
    label = result['label']

    tl = (result['topleft']['x'],result['topleft']['y'])
    br = (result['bottomright']['x'],result['bottomright']['y'])

    clipped = image[tl[1]:br[1], tl[0]:br[0]]

    if label in object_with_colors:
        try:
            clipped = image[tl[1]:br[1], tl[0]:br[0]]
#             color = cv2.resize(clipped, (1,1))
#             print(color);
#             clipped = rm_bg(clipped)
#                 _, actual = color(clipped)
        except:
            actual = ''
            pass

#             clipped = cv2.putText(clipped, actual, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

    #         return "%s %s" % (actual, label)

    #     if label == "person":
    #         label = get_name(image)
    #         print("person", label)
    return label

def recz(img, prev_output):
    label=""
    async_result = pool.apply_async(predict, (img, ))

    stime = time.time()
    results = async_result.get()

    out_string = "There is "
    c = 0

    print(results)

    for clr, result in zip(colors, results):
        tl = (result['topleft']['x'], result['topleft']['y'])
        br = (result['bottomright']['x'], result['bottomright']['y'])
        label = fetchMetaInformation(result, img)

        out_string += "a %s, " % (label)

        img = cv2.rectangle(img, tl, br, clr, 7)
        img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

        c += 1

    if(c > 0 and prev_output != out_string):
        threading.Thread(target=say, args=(out_string + ".", )).start()
        prev_output = out_string

    fps = 1 / (time.time() - stime)

    img = cv2.putText(
        img,
        'FPS {:.1f}'.format(fps),
        (10, 40),
        cv2.FONT_HERSHEY_COMPLEX,
        .8,
        (255, 255, 255),
        2
    )

    print('\rFPS {:.1f}'.format(fps), end="\r")

    return (img, prev_output)

def rm_bg(img):
  hMin = 29  # Hue minimum
  sMin = 30  # Saturation minimum
  vMin = 0   # Value minimum (Also referred to as brightness)
  hMax = 179 # Hue maximum
  sMax = 255 # Saturation maximum
  vMax = 255 # Value maximum
  # Set the minimum and max HSV values to display in the output image using numpys' array function. We need the numpy array since OpenCVs' inRange function will use those.
  lower = np.array([hMin, sMin, vMin])
  upper = np.array([hMax, sMax, vMax])
  # Create HSV Image and threshold it into the proper range.
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Converting color space from BGR to HSV
  mask = cv2.inRange(hsv, lower, upper) # Create a mask based on the lower and upper range, using the new HSV image
  # Create the output image, using the mask created above. This will perform the removal of all unneeded colors, but will keep a black background.
  output = cv2.bitwise_and(img, img, mask=mask)
  # Add an alpha channel, and update the output image variable
  *_, alpha = cv2.split(output)
  dst = cv2.merge((output, alpha))
  return output

def color(img):
    data = np.reshape(img, (-1,3))
    data = np.float32(data)

    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1, 1.0)
    flags = cv2.KMEANS_RANDOM_CENTERS
    compactness, labels, centers = cv2.kmeans(data,1,None,criteria,10,flags)

    color = centers[0].astype(np.int32)
    color = (color[0], color[1], color[2])
    color_hex = '#%02x%02x%02x' % color

    return (color_hex, get_colour_name(color)[1])

def _hex_to_name(requested_colour):
    try:
        closest_name = actual_name = hex_to_name(requested_colour)
    except ValueError:
        closest_name = closest_colour(requested_colour)
        actual_name = None
    return actual_name, closest_name

def closest_colour(requested_colour):
    min_colours = {}
    try:
        requested_colour = hex_to_rgb(requested_colour)
    except:
        pass
    for key, name in css3_hex_to_names.items():
        r_c, g_c, b_c = hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]

In [6]:
accuracy = .5
options = {
    'model': 'cfg/yolo.cfg',
    'load': 'bin/yolo.weights',
    'threshold': accuracy,
    'gpu': 0.82
}
tfnet = TFNet(options)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Parsing ./cfg/yolo.cfg
Parsing cfg/yolo.cfg
Loading bin/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.019945621490478516s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  Yep

## Capture from camera

In [3]:
cap = cv2.VideoCapture(0)

pool = ThreadPool(processes=10)
prev_output = ""

engine.say("Started")
while(True):
    ret, frame = cap.read()

    img, prev_output  = recz(frame, prev_output)

    cv2.imshow('ImageWindow', img)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        cap.release()
        break

## Capture from window

In [None]:
import win32gui
from PIL import ImageGrab

# hwnd = win32gui.FindWindow(None, r'192.168.43.56:4747/video')
# hwnd = win32gui.FindWindow(None, r'DroidCam Video Feed')
# hwnd = win32gui.FindWindow(None, r'a.mp4 - VLC Media Player')
hwnd = win32gui.FindWindow(None, r'DroidCam Video')
dimensions = win32gui.GetWindowRect(hwnd)
# win32gui.SetForegroundWindow(hwnd)

pool = ThreadPool(processes=3)
prev_output = ""

engine.say("Started")

while(True):
    image = ImageGrab.grab(dimensions)
    img = np.array(image)
    img = img[:, :, ::-1].copy()

    img, prev_output  = recz(img, prev_output)

    cv2.imshow('ImageWindow', img)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

### Make Images

In [None]:
frame = cv2.imread("./sample.jpeg")
prev_output = ""
img, prev_output  = recz(frame, prev_output)

cv2.imwrite("outputs/accuracy_" + str(accuracy) +".jpeg", img)
# while(True):
#     if cv2.waitKey(1000) & 0xFF == ord('q'):
#         cv2.destroyAllWindows()

### Make video

In [None]:
import os
from collections import Counter

import cv2
import face_recognition
from IPython.display import clear_output
import pickle

cv2.destroyAllWindows()
INPUT_FILE = "a.mp4" 
OUTPUT_FILE = "out_a.avi"
RESZIE_WIDTH = 1920
RESIZE_HEIGHT = int(9 / 16 * RESZIE_WIDTH)

vid = cv2.VideoCapture(INPUT_FILE)
check, org = vid.read() 

present_name = list()
pool = ThreadPool(processes=10)

if check:
    rows, cols, layer = org.shape

out = cv2.VideoWriter(OUTPUT_FILE, cv2.VideoWriter_fourcc(*'DIVX'), 30, (RESZIE_WIDTH, RESIZE_HEIGHT))
prev_output = ""
while True:
    check, org = vid.read() 

    if org is None:
        break
    k = cv2.waitKey(1)

    if k % 256 == 27:
        print("\nEscape hit, closing...")
        break
    clear_output(wait=True)
    image = cv2.resize(org, (RESZIE_WIDTH, RESIZE_HEIGHT))

#     # rotate image
#     M = cv2.getRotationMatrix2D((RESZIE_WIDTH / 2, RESIZE_HEIGHT / 2), -90, 1)
#     image = cv2.warpAffine(image, M, (RESZIE_WIDTH, RESIZE_HEIGHT))

    img, prev_output  = recz(image, prev_output)

    out.write(img)
    cv2.imshow("a", img)

vid.release()
out.release()

cv2.destroyAllWindows()

### Sample

In [None]:
import matplotlib.pyplot as plt
from webcolors import rgb_percent_to_hex, hex_to_name,hex_to_rgb

# read the color image and covert to RGB
img = cv2.imread('aaaa.jpg', cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def centroid_histogram(clt):
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)

    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()


    # return the histogram
    return hist

def plot_colors(hist, centroids):
    # initialize the bar chart representing the relative frequency
    # of each of the colors
    bar = np.zeros((50, 300, 3), dtype = "uint8")
    startX = 0

    # loop over the percentage of each cluster and the color of
    # each cluster
    for (percent, color) in zip(hist, centroids):
        # plot the relative percentage of each cluster
        endX = startX + (percent * 300)
        cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
            color.astype("uint8").tolist(), -1)
        startX = endX

    # return the bar chart
    return bar

def _hex_to_name(requested_colour):
    try:
        closest_name = actual_name = hex_to_name(requested_colour)
    except ValueError:
        closest_name = closest_colour(requested_colour)
        actual_name = None
    return actual_name, closest_name

def closest_colour(requested_colour):
    min_colours = {}
    try:
        requested_colour = hex_to_rgb(requested_colour)
    except:
        pass
    for key, name in webcolors.css3_hex_to_names.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]
    
def color2(image):
    image = image.reshape((image.shape[0] * image.shape[1], 3))
    clt = KMeans(n_clusters = 3)
    clt.fit(image)
#     print(clt.cluster_centers_)
#     hist = centroid_histogram(clt)
#     bar = plot_colors(hist, clt.cluster_centers_)
    # show our color bart
#     fig = plt.figure()
#     ax = fig.add_axes([0,0,1,1])
    labs = [_hex_to_name('#' + ''.join(["%0.2X"%(j) for j in i.astype("uint8").tolist()]))[1] for i in clt.cluster_centers_]
#     print(labs)
#     return labs[0]

    ax.bar(labs, hist)
    plt.axis("off")
    plt.hist(hist)
    print(hist)
    plt.show()
    plt.imshow(bar)
    print([(i,j) for i,j in zip(hist, clt.cluster_centers_)])

results = tfnet.return_predict(img)
cv2.destroyAllWindows()

for clr, result in zip(colors, results):
    tl = (result['topleft']['x'], result['topleft']['y'])
    br = (result['bottomright']['x'], result['bottomright']['y'])
    label = result['label']
    clipped = img[tl[1]:br[1], tl[0]:br[0]]

    # try:
    #     clipped = rm_bg(clipped)
    # except:
# 
#     clipped = rm_bg(clipped)
    clipped = cv2.cvtColor(clipped, cv2.COLOR_RGB2BGR)
    print(color(clipped))
    
    img = cv2.rectangle(img, tl, br, clr, 7)
    img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
#     cv2.imshow(label, clipped)

# add the box and label and display itq
clipped = cv2.cvtColor(clipped, cv2.COLOR_BGR2RGB)

cv2.imshow("f", img)
cv2.waitKey(100000)
cv2.destroyAllWindows()
# cv2.imshow("pre", img)
# cv2.destroyAllWindows()

### Face detection

In [None]:
from IPython.display import clear_output
import pickle

print("[INFO] loading encodings...")
data = pickle.loads(open(DATASET_FILE_NAME, "rb").read())

print("[INFO] recognizing faces...")

cam = cv2.VideoCapture(0)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 480)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)

cv2.namedWindow("recognize faces")

num_names = Counter(data['names'])
present_name = list()

while True:
    ret, image = cam.read()
    if not ret:
        break
    k = cv2.waitKey(1)

    if k % 256 == 27:
        print("\nEscape hit, closing...")
        break

    clear_output(wait=True)

    boxes = face_recognition.face_locations(image, model='hop')
    encodings = face_recognition.face_encodings(image, boxes)
    names, present_name = [], []

    # find faces    
    for encoding in encodings:
        matches = face_recognition.compare_faces(data["encodings"], encoding)
        num_prop = 0

        if True in matches:
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}

            for i in matchedIdxs:
                name = data["names"][i]
                counts[name] = counts.get(name, 0) + 1

            name = max(counts, key=counts.get)
            num_prop = counts[name] / num_names[name]

            print("[INFO] detected %s with '%f' accuracy ..." %
                  (name, num_prop))

        if(num_prop > 0.85):
            t = time.strftime('%Y-%m-%d %H:%M:%S')

            print("[INFO] Found %s (%f)." % (name, num_prop))

            present_name.append(name)

        names.append(name if num_prop > 0.85 else "Unknown")

    # draw rectangle over found faces
    for ((top, right, bottom, left), name) in zip(boxes, names):
      
        left, top, right, bottom = left - 40, top - 80, right + 40, bottom + 20
        
        cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
        
        s = cv2.getTextSize(name, FONT, 0.75, 1)

        cv2.rectangle(image, (left, top), (left + s[0][0] + 40,  top + s[0][1] + 20), (255, 0, 0), -1)
        name = name.replace("_", " ")
        name = name.title()

        cv2.putText(image, name, (left + 20, top + 10 + s[0][1]), FONT, 0.75, (255, 255, 255), 2)

    cv2.imshow("recognize faces", image)

cam.release()

cv2.destroyAllWindows()

## Testing

In [None]:
import cv2


def bincount_numexpr_app(a):
    a2D = a.reshape(-1,a.shape[-1])
    col_range = (256, 256, 256) # generically : a2D.max(0)+1
    eval_params = {'a0':a2D[:,0],'a1':a2D[:,1],'a2':a2D[:,2],
                   's0':col_range[0],'s1':col_range[1]}
    a1D = ne.evaluate('a0*s0*s1+a1*s0+a2',eval_params)
    return np.unravel_index(np.bincount(a1D).argmax(), col_range)

### Training Facial data

In [None]:
import os
from imutils import paths

TRAINING_DATASET = 'dataset_training'
TESTING_DATASET = 'dataset_testing'

print("[INFO] fetching images...")
image_paths = list(paths.list_images(TRAINING_DATASET))

knownEncodings = []
knownNames = []

num_images = len(image_paths)

for (i, image_path) in enumerate(image_paths):
    name = image_path.split(os.path.sep)[-2]
    print("[INFO] processing image (%s) %d/%d..." % (name, i + 1, num_images), end="\r")

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    boxes = face_recognition.face_locations(image, model='cnn')

    encodings = face_recognition.face_encodings(image, boxes)

    for encoding in encodings:
        knownEncodings.append(encoding)
        knownNames.append(name)
 
print("\n[INFO] serializing encodings...")
data = {"encodings": knownEncodings, "names": knownNames}

f = open(DATASET_FILE_NAME, "wb")
f.write(pickle.dumps(data))
f.close()

print("[INFO] DONE.")