# Attempting to recognize Simpsons characters w/ a CNN and the below Kaggle dataset

https://www.kaggle.com/alexattia/the-simpsons-characters-dataset

In [1]:
import cv2
import numpy as np
import os
# import PIL
import tensorflow as tf

import matplotlib.image as mpimg
from matplotlib import pyplot as plt
from sklearn.utils import shuffle
from tensorflow.contrib.layers import fully_connected

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
def flatten(dimData, images):
    images = np.array(images)
    print(images.shape)
    images = images.reshape(len(images), dimData)
    images = images.astype('float32')
    images /=255
    return images

# Get Data

In [4]:
def get_img_n_lbl(path, folders_arr):
    labels, images = [], []
    key = {}
    for folder in folders_arr:
#         print(folder)
        label = folders_arr.index(folder)
        key[folder] = label
        
        for img in os.listdir(path+'/'+folder):
            try:
                image = cv2.imread(path+'/'+folder+'/'+img, 0)
                img = cv2.resize(image, (200,200))
                images.append(img)
                labels.append(label)
            except Exception as e:
                print(e)
    print(key)
    return images, labels, key
    

In [5]:
def get_sub_dirs(path):
    return [dI for dI in os.listdir(path) if os.path.isdir(os.path.join(path,dI))]

In [6]:
path = os.getcwd()+"/../temp_data_storage/simpsons_dataset"

In [7]:
sub_dirs = get_sub_dirs(path)

In [8]:
images, labels, key = get_img_n_lbl(path, sub_dirs)

OpenCV(3.4.1) /feedstock_root/build_artefacts/opencv_1520722599420/work/opencv-3.4.1/modules/imgproc/src/resize.cpp:4044: error: (-215) ssize.width > 0 && ssize.height > 0 in function resize

{'hans_moleman': 0, 'marge_simpson': 1, 'comic_book_guy': 2, 'lisa_simpson': 3, 'sideshow_mel': 4, 'waylon_smithers': 5, 'jimbo_jones': 6, 'lenny_leonard': 7, 'maggie_simpson': 8, 'rainier_wolfcastle': 9, 'principal_skinner': 10, 'bumblebee_man': 11, 'homer_simpson': 12, 'chief_wiggum': 13, 'cletus_spuckler': 14, 'sideshow_bob': 15, 'professor_john_frink': 16, 'gil': 17, 'abraham_grampa_simpson': 18, 'moe_szyslak': 19, 'groundskeeper_willie': 20, 'ralph_wiggum': 21, 'agnes_skinner': 22, 'carl_carlson': 23, 'barney_gumble': 24, 'bart_simpson': 25, 'lionel_hutz': 26, 'edna_krabappel': 27, 'patty_bouvier': 28, 'nelson_muntz': 29, 'ned_flanders': 30, 'otto_mann': 31, 'charles_montgomery_burns': 32, 'snake_jailbird': 33, 'mayor_quimby': 34, 'fat_tony': 35, 'jasper_beardly': 36, 'miss_hoover': 37, 'selm

# Split into training and test sets

In [9]:
def split_test_train(images, labels):
    #shuffle data
    labels, images = shuffle(labels, images, random_state=0)    
    
    to_train = 0
    train_images, train_labels, test_images, test_labels = [],[],[],[]
    for img, lbl in zip(images, labels):
#         print(to_train)
        try:
            if to_train < 10:
                train_images.append(img)
                train_labels.append(lbl)
                to_train += 1
            else:
                test_images.append(img)
                test_labels.append(lbl)
                to_train = 0
        except Exception as e:
            print(e)
    
            
    return np.asarray(train_images), np.asarray(train_labels), np.asarray(test_images), np.asarray(test_labels)

In [10]:
train_images, train_labels, test_images, test_labels = split_test_train(images, labels)

In [11]:
train_images[0].shape

(200, 200)

In [12]:
np.prod(train_images[0].shape)

40000

In [13]:
dataDim = np.prod(images[0].shape)
train_data  = flatten(dataDim, train_images)
test_data = flatten(dataDim, test_images)

(19030, 200, 200)
(1903, 200, 200)


In [29]:
#batch_size, height, width, channels = dataset.shape
training_img_cnt, height, width = train_images.shape
channels = 1
n_inputs = height * width

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"
conv2_dropout_rate = 0.25

conv3_fmaps = 16

pool3_fmaps = conv2_fmaps

n_fc1 = 128
fc1_dropout_rate = 0.5

n_fc2 = 64
fc2_dropout_rate = 0.5

n_outputs = len(key)


In [55]:
with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None], name="y")

conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")
print(conv1.shape)
print(conv2.shape)

with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    print(pool3.shape)
#     pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 50 * 50 * 4])
    print(pool3_flat.shape)
    pool3_flat_drop = tf.layers.dropout(pool3, conv2_dropout_rate, training=True)

conv3 = tf.layers.conv2d(pool3_flat_drop, filters=conv3_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv3")
conv4 = tf.layers.conv2d(conv3, filters=conv3_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv4")

conv5 = tf.layers.conv2d(conv4, filters=conv3_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv5")
conv6 = tf.layers.conv2d(conv5, filters=conv3_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv6")

with tf.name_scope("pool7"):
    print("pool 7")
    pool7 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    print(pool7.shape)
    pool7_flat = tf.reshape(pool7, shape=[-1, pool3_fmaps * 50 * 50 * 4])
    print(pool7_flat.shape)
    pool7_flat_drop = tf.layers.dropout(pool7_flat, conv2_dropout_rate, training=True)

with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool7_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
    print(fc1.shape)
    fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=True)

with tf.name_scope("fc2"):
    fc2 = tf.layers.dense(fc1_drop, n_fc2, activation=tf.nn.relu, name="fc2")
    print(fc2.shape)
    fc2_drop = tf.layers.dropout(fc2, fc2_dropout_rate, training=True)
    print(fc2_drop.shape)
    
with tf.name_scope("output"):
    logits = tf.layers.dense(fc2_drop, n_outputs, name="output")
    print(logits.shape)
    
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

(?, 200, 200, 32)
(?, 200, 200, 64)
(?, 100, 100, 64)
(?, 640000)
pool 7
(?, 100, 100, 64)
(?, 640000)
(?, 128)
(?, 64)
(?, 64)
(?, 47)


In [56]:
n_epochs = 500
batch_size = 20
train_acc = []
test_acc = []


with tf.Session() as sess:
    saver.restore(sess, "./models/simpsons.ckpt")
    init.run()
    for epoch in range(n_epochs):
        if epoch % 10 == 0:
            print(epoch)
        batch_old = 0

        for iteration in range(training_img_cnt // batch_size):
            X_batch = train_data[batch_old: batch_old + batch_size]
            y_batch = train_labels[batch_old: batch_old + batch_size]
#             print(type(y_batch))
#             print(iteration)s
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            batch_old += batch_size
        train_acc.append(accuracy.eval(feed_dict={X: X_batch, y: y_batch}))
#     acc_test = accuracy.eval(feed_dict={X: test_data, y: test_labels})
#     print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
        test_acc.append(accuracy.eval(feed_dict={X: test_data[:100], y: test_labels[:100]}))
#         print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
        if epoch % 10 == 0:
            print(epoch)
            save_path = saver.save(sess, "./models/simpsons.ckpt")
    

INFO:tensorflow:Restoring parameters from ./models/simpsons.ckpt
0
0
10
10
20
20
30
30
40
40
50
50
60
60
70
70
80
80
90
90
100
100
110
110
120
120
130
130
140
140
150
150
160
160
170
170
180
180
190
190
200
200
210
210
220
220
230
230
240
240
250
250
260
260
270
270
280
280
290
290
300
300
310
310
320
320
330
330
340
340
350
350
360
360
370
370
380
380
390
390
400
400
410
410
420
420
430
430
440
440
450
450
460
460
470
470
480
480
490
490


In [54]:
reset_graph()

In [44]:
with tf.Session() as sess:
    saver.restore(sess, "./models/simpsons.ckpt")
    init.run()
    acc_test = accuracy.eval(feed_dict={X: test_data[:100], y: test_labels[:100]})
    print("Test accuracy:", acc_test)

INFO:tensorflow:Restoring parameters from ./models/simpsons.ckpt


NotFoundError: Key conv1/bias/Adam_1 not found in checkpoint
	 [[Node: init_and_save/save/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_init_and_save/save/Const_0_0, init_and_save/save/RestoreV2_2/tensor_names, init_and_save/save/RestoreV2_2/shape_and_slices)]]
	 [[Node: init_and_save/save/RestoreV2_16/_61 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_144_init_and_save/save/RestoreV2_16", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'init_and_save/save/RestoreV2_2', defined at:
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/asyncio/base_events.py", line 1431, in _run_once
    handle._run()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-43-081315d67bb5>", line 73, in <module>
    saver = tf.train.Saver()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1239, in __init__
    self.build()
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1248, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1284, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 765, in _build_internal
    restore_sequentially, reshape)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 428, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 268, in restore_op
    [spec.tensor.dtype])[0])
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1031, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/cecil/apps/anaconda3/envs/ds/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Key conv1/bias/Adam_1 not found in checkpoint
	 [[Node: init_and_save/save/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_init_and_save/save/Const_0_0, init_and_save/save/RestoreV2_2/tensor_names, init_and_save/save/RestoreV2_2/shape_and_slices)]]
	 [[Node: init_and_save/save/RestoreV2_16/_61 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_144_init_and_save/save/RestoreV2_16", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


In [18]:
train_labels[0:200].shape

(200,)

In [58]:
train_acc

[0.15,
 0.35,
 0.4,
 0.35,
 0.55,
 0.55,
 0.65,
 0.55,
 0.45,
 0.4,
 0.55,
 0.65,
 0.75,
 0.6,
 0.65,
 0.55,
 0.6,
 0.65,
 0.65,
 0.75,
 0.75,
 0.6,
 0.7,
 0.75,
 0.7,
 0.45,
 0.7,
 0.7,
 0.8,
 0.85,
 0.85,
 0.8,
 0.7,
 0.75,
 0.7,
 0.75,
 0.75,
 0.8,
 0.85,
 0.75,
 0.65,
 0.75,
 0.75,
 0.6,
 0.8,
 0.75,
 0.8,
 0.8,
 0.85,
 0.7,
 0.75,
 0.85,
 0.65,
 0.8,
 0.75,
 0.7,
 0.9,
 0.75,
 0.55,
 0.75,
 0.8,
 0.65,
 0.9,
 0.75,
 0.85,
 0.7,
 0.75,
 0.85,
 0.65,
 0.85,
 0.7,
 0.75,
 0.7,
 0.75,
 0.9,
 0.7,
 0.8,
 0.75,
 0.75,
 0.85,
 0.7,
 0.8,
 0.8,
 0.9,
 0.65,
 0.65,
 0.75,
 0.9,
 0.8,
 0.7,
 0.85,
 0.8,
 0.45,
 0.7,
 0.75,
 0.85,
 0.7,
 0.85,
 0.75,
 0.65,
 0.7,
 0.8,
 0.6,
 0.9,
 0.8,
 0.75,
 0.65,
 0.75,
 0.8,
 0.8,
 0.7,
 0.7,
 0.85,
 0.45,
 0.8,
 0.9,
 0.65,
 0.6,
 0.75,
 0.6,
 0.6,
 0.85,
 0.8,
 0.9,
 0.75,
 0.8,
 0.8,
 0.8,
 0.85,
 0.75,
 0.75,
 0.75,
 0.65,
 0.9,
 0.75,
 0.75,
 0.75,
 0.95,
 0.65,
 0.9,
 0.85,
 0.7,
 0.9,
 0.8,
 0.9,
 0.8,
 0.75,
 0.8,
 0.8,
 0.9,
 0.85,
 0.75,
 0.9,

In [57]:
test_acc

[0.15,
 0.2,
 0.27,
 0.28,
 0.25,
 0.22,
 0.2,
 0.29,
 0.26,
 0.31,
 0.33,
 0.33,
 0.38,
 0.37,
 0.37,
 0.32,
 0.37,
 0.35,
 0.38,
 0.41,
 0.38,
 0.39,
 0.33,
 0.38,
 0.37,
 0.33,
 0.38,
 0.36,
 0.37,
 0.46,
 0.33,
 0.43,
 0.32,
 0.35,
 0.38,
 0.4,
 0.37,
 0.38,
 0.37,
 0.39,
 0.42,
 0.45,
 0.39,
 0.37,
 0.4,
 0.49,
 0.41,
 0.4,
 0.44,
 0.37,
 0.32,
 0.38,
 0.41,
 0.48,
 0.44,
 0.37,
 0.39,
 0.43,
 0.39,
 0.39,
 0.34,
 0.39,
 0.41,
 0.35,
 0.39,
 0.41,
 0.44,
 0.39,
 0.44,
 0.37,
 0.37,
 0.38,
 0.41,
 0.36,
 0.38,
 0.43,
 0.44,
 0.42,
 0.39,
 0.39,
 0.36,
 0.44,
 0.38,
 0.4,
 0.42,
 0.34,
 0.36,
 0.4,
 0.35,
 0.39,
 0.42,
 0.33,
 0.42,
 0.31,
 0.33,
 0.41,
 0.34,
 0.39,
 0.39,
 0.39,
 0.35,
 0.37,
 0.37,
 0.35,
 0.33,
 0.41,
 0.35,
 0.37,
 0.39,
 0.35,
 0.32,
 0.38,
 0.35,
 0.26,
 0.34,
 0.38,
 0.28,
 0.36,
 0.35,
 0.42,
 0.42,
 0.42,
 0.4,
 0.36,
 0.39,
 0.37,
 0.34,
 0.36,
 0.35,
 0.33,
 0.38,
 0.32,
 0.39,
 0.34,
 0.37,
 0.44,
 0.42,
 0.36,
 0.41,
 0.43,
 0.42,
 0.38,
 0.34,
 0.37,
