In [5]:
import numpy as np
import cv2
import tensorflow as tf
import os
import sys

import matplotlib.pyplot as plt

# Disable tensorflow logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

MNIST_MODEL = "./trained_models/mnist/"

H, W = 28, 28
EPSILON = 0.25
MIN_AREA = 750

def load_trained_model():

    sess=tf.Session()    
    #First let's load meta graph and restore weights
    saver = tf.train.import_meta_graph(os.path.join(MNIST_MODEL, "model.ckpt.meta"))
    saver.restore(sess, os.path.join(MNIST_MODEL, "model.ckpt"))

    graph = tf.get_default_graph()

    x = graph.get_tensor_by_name("x:0")
    out = graph.get_tensor_by_name("out/BiasAdd:0")

    return sess, x, out


def find_digits(img, reader):
    img_copy = img.copy()

    img_h, img_w, _ = img_copy.shape

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (15, 15), 0)
    thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 15, 2)

    contours = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[1]

    digit_roi = None

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if cv2.contourArea(cnt) > MIN_AREA:
            [x,y,w,h] = cv2.boundingRect(cnt)

            if w > h*0.3 and w < h:

                eps_h = int(EPSILON * h)
                eps_w = int(EPSILON * w)
                roi = gray[y: y + h, x: x + w]

                digit, prob, digit_roi = reader(roi)

                if prob > 0.1:
                    cv2.putText(img_copy, str(digit), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))
                    cv2.rectangle(img_copy, (x,y), (x+w,y+h), (0, 0, 255), 2)


    return img_copy, digit_roi

def softmax(_in):
    return np.exp(_in) / np.sum(np.exp(_in))

def reader(img, sess, x, out):
    img = img.copy()
    img = 255 - img

    img_h, img_w = img.shape

    ret, _ = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img[img <= ret] = 0

    img = (img - np.min(img)) / (np.max(img) - np.min(img))

    if img_h > img_w:
        pad = img_h - img_w
        img = np.pad(img, ((0,0), (pad//2, pad//2)), 'constant', constant_values=0)
    else:
        pad = img_w - img_h
        img = np.pad(img, ((pad//2 , pad//2), (0,0)), 'constant', constant_values=0)

    img_h, img_w = img.shape

    pad_w = int(EPSILON * img_w)
    pad_h = int(EPSILON * img_h)

    img = np.pad(img, ((pad_h , pad_h), (pad_w,pad_w)), 'constant', constant_values=0)

    img = cv2.resize(img, (H, W), cv2.INTER_NEAREST)

    img_input = np.reshape(img, (1, H, W, 1))

    graph_out, = sess.run([out], feed_dict={x: img_input})

    graph_out = np.squeeze(graph_out)

    char = np.argmax(graph_out)
    prob = max(softmax(graph_out))

    return char, prob, img


def run(i, sess, x, out):
    cap = cv2.VideoCapture(i)

    cv2.namedWindow("Frame", cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty("Frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)

    while(True):
        ret, frame = cap.read()

        h, w, c = frame.shape

        notated_img, _ = find_digits(frame, lambda img: reader(img, sess, x, out))

        cv2.imshow("Frame", notated_img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


def test_camera():
    cameras = []
    for i in range(3):
        cap = cv2.VideoCapture(i)
        ret, frame = cap.read()
        try:
            h, w, c = frame.shape
        except AttributeError:
            continue
        cap.release()
        cameras.append(i)

    return cameras

if __name__ == "__main__":
    cameras = test_camera()
    if len(cameras) == 0:
        print("Webcam not detected.")
        sys.exit(1)

    if len(cameras) == 1:
        print("Using webcam # {}".format(cameras[0]))
        i = cameras[0]
    else:
        i = int(input("{} cameras detected. Provide an index of the one to use ({} to {}): ".format(len(cameras), 0, len(cameras)-1)))

    sess, x, out = load_trained_model()
    run(i, sess, x, out)


INFO:tensorflow:Restoring parameters from ./trained_models/mnist/model.ckpt


In [3]:
import numpy as np
import cv2
import tensorflow as tf
import os
import sys

import matplotlib.pyplot as plt
import time

import pickle

MNIST_MODEL = "./trained_models/simpsons/"
CHARACTER_MAP = "./dumps/character_map.dump"

H, W = 128, 128

def load_characters():
    f = open(CHARACTER_MAP, "rb", pickle.HIGHEST_PROTOCOL)
    return pickle.load(f)


def load_trained_model():

    sess=tf.Session()    
    #First let's load meta graph and restore weights
    saver = tf.train.import_meta_graph(os.path.join(MNIST_MODEL, "model.ckpt.meta"))
    saver.restore(sess, os.path.join(MNIST_MODEL, "model.ckpt"))

    graph = tf.get_default_graph()

    x = graph.get_tensor_by_name("x:0")
    out = graph.get_tensor_by_name("out/BiasAdd:0")

    return sess, x, out


def softmax(_in):
    return np.exp(_in) / np.sum(np.exp(_in))


def classify_character(img, character_map, sess, x, out):
    img_copy = img.copy()
    img_copy = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB)
    img_h, img_w, _ = img_copy.shape

    img_copy = cv2.resize(img_copy, (H, W))

    graph_out = sess.run(out, feed_dict={x: np.reshape(img_copy, (1, H, W, 3))})

    graph_out = softmax(np.squeeze(graph_out))

    character = character_map.get(np.argmax(graph_out))
    p = max(graph_out)

    if p > 0.8:
        cv2.putText(img, character.replace("_", " "), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    return img


def run(vid, character_map, sess, x, out):

    FPS = 24

    cap = cv2.VideoCapture(vid)

    cv2.namedWindow("Frame", cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty("Frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)

    now = time.time()
    while(True):
        ret, frame = cap.read()

        h, w, c = frame.shape

        img = classify_character(frame, character_map, sess, x, out)

        cv2.imshow("Frame", img)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        interval = time.time() - now
        if interval < 1/FPS:
            time.sleep(1/FPS - interval)
        now = time.time()

    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    vid = sys.argv[1]
    sess, x, out = load_trained_model()
    character_map = load_characters()
    run(vid, character_map, sess, x, out)


INFO:tensorflow:Restoring parameters from ./trained_models/simpsons/model.ckpt


InvalidArgumentError: Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [128] rhs shape= [512]
	 [[node save/Assign_20 (defined at Users\Ionatan\AppData\Local\Temp/ipykernel_12792/1980171179.py:22) ]]

Original stack trace for 'save/Assign_20':
  File "Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "Anaconda3\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
    app.start()
  File "Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 667, in start
    self.io_loop.start()
  File "Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "Anaconda3\lib\asyncio\base_events.py", line 539, in run_forever
    self._run_once()
  File "Anaconda3\lib\asyncio\base_events.py", line 1775, in _run_once
    handle._run()
  File "Anaconda3\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
    await self.process_one()
  File "Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
    await dispatch(*args)
  File "Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
    await result
  File "Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
    reply_content = await reply_content
  File "Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 345, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2899, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2944, in _run_cell
    return runner(coro)
  File "Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3170, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3361, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "Users\Ionatan\AppData\Local\Temp/ipykernel_12792/1980171179.py", line 155, in <module>
    sess, x, out = load_trained_model()
  File "Users\Ionatan\AppData\Local\Temp/ipykernel_12792/1980171179.py", line 22, in load_trained_model
    saver = tf.train.import_meta_graph(os.path.join(MNIST_MODEL, "model.ckpt.meta"))
  File "Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1449, in import_meta_graph
    **kwargs)[0]
  File "Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1473, in _import_meta_graph_with_return_elements
    **kwargs))
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\meta_graph.py", line 857, in import_scoped_meta_graph_with_return_elements
    return_elements=return_elements)
  File "Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\importer.py", line 443, in import_graph_def
    _ProcessNewOps(graph)
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\importer.py", line 236, in _ProcessNewOps
    for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3751, in _add_new_tf_operations
    for c_op in c_api_util.new_tf_operations(self)
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3751, in <listcomp>
    for c_op in c_api_util.new_tf_operations(self)
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3641, in _create_op_from_tf_operation
    ret = Operation(c_op, self)
  File "Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()
