### In this notebook, we will use simple comparison of the image and Waldo to find the Waldo in the image. This method is a benchmark for later, machine learning methods.

In [52]:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

In [53]:
# Get the data - every image from the 64 directory

train_ds= keras.utils.image_dataset_from_directory(
    '../data/src1/64',
    validation_split=0.2,
    subset="training",
    seed=1337,
    image_size=(64, 64),
    batch_size=32)

print(train_ds.class_names)

# print both class names and the number of images in each class

for i in range(len(train_ds.class_names)):
    print(train_ds.class_names[i], len(train_ds.file_paths[i]))


Found 5376 files belonging to 2 classes.
Using 4301 files for training.
['notwaldo', 'waldo']


In [54]:
# Get the Waldo image as an rgb array
Waldos = []

Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/3.png').convert('RGB')))
Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/5.png').convert('RGB')))
Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/6.png').convert('RGB')))
Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/9.png').convert('RGB')))
Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/14.png').convert('RGB')))
Waldos.append(keras.utils.img_to_array(Image.open('../data/src2/OnlyWaldoHeads/16.png').convert('RGB')))

Waldos = np.array(Waldos)



print(Waldos.shape)

(6,)


  Waldos = np.array(Waldos)


In [63]:
# Slide the Waldo image over the data

def find_waldo_in_img(waldos, img):
    min_res = None
    min_val = None
    min_waldo = None
    min_idx = None
    for waldo in waldos:
        waldo = waldo / 255.0
        img = img / 255.0
        waldo_h, waldo_w, _ = waldo.shape
        img_h, img_w, _ = img.shape
        # for each possible position of the waldo image calculate the difference between the waldo image and the image
        # and sum the differences
        # return the position with the smallest difference and matrix of differences
        result = np.zeros((img_h - waldo_h, img_w - waldo_w))
        for i in range(img_h - waldo_h):
            for j in range(img_w - waldo_w):
                result[i, j] = np.sum(np.abs(img[i:i + waldo_h, j:j + waldo_w] - waldo))
        min_local = np.unravel_index(np.argmin(result), result.shape)
        if min_res is None or np.min(result) < min_val:
            min_res = result    
            min_val = np.min(result)
            min_waldo = waldo
            min_idx = min_local
    return min_res, min_val, min_waldo, min_idx

In [65]:
# Calculate the average minimal difference in images with Waldo and without Waldo

# split the data into images with Waldo and without Waldo

sum_waldo = 0
sum_no_waldo = 0
count_waldo = 0
count_no_waldo = 0

batches_tested = 0

for image_batch, labels_batch in train_ds:
    # if batch has no images with Waldo, skip it
    if np.sum(labels_batch) == 0:
        continue
    batches_tested += 1
    for i in range(len(image_batch)):
        if labels_batch[i] == 1:
            count_waldo += 1
            res, min_val, min_waldo, min_idx = find_waldo_in_img(Waldos, image_batch[i])
            sum_waldo += min_val
        else:
            count_no_waldo += 1
            res, min_val, min_waldo, min_idx = find_waldo_in_img(Waldos, image_batch[i])
            sum_no_waldo += min_val
    # print(count_waldo, count_no_waldo, batches_tested)
    if batches_tested >= 10:
        break

if count_waldo != 0:
    print(sum_waldo/count_waldo)
if count_no_waldo != 0:
    print(sum_no_waldo/count_no_waldo)
    
print

1 31 1
2 62 2
3 93 3
4 124 4
5 155 5
6 186 6
7 217 7
8 248 8
9 279 9
10 310 10
597.7921142578125
597.6803372290826


<function print>