# Workpackage: Data engineering

Research Question:
**"How far can we simplify the input data to be still able to distinguish between Hand, Paper, and Scissors?"**

Research Answer:
We can simplify input pictures through by converting them to greyscale and reducing the resolution. Both methods can be used without loosing much of the needed elements. Additionaly we can blur the images, to remove details and only get rough shapes and then use segmentation methods like otsu to get a binary image with the shape of the hand. On simple and clear input images, this can work so good, that with the calculation of histograms one could distinguish the gestures without maschine learning at all. The drawbacks are, that one relies heavily on the selection of the segmentation method and thus needs to be carefully chosen. Another problem shows the segmentation of more complex data. There the segmentation with basic methods have shown to be very incorrect and partwise not usefull at all. But this could also be due to the fact that the implemented otsu method is a global threshold segmentation method, which is not siuted for this usecase. If one has a good segmentation method for this use case, one could as also implemented cut the background out, so that the ML algorithm just has to distinguish between face and hand if the segmentation method lacks of that capability.
**All in all the simplest robust image we were able to generate, which could optimize the training robustly is the blurred greyscale image, which has a reduced resolution. Further evaluation needs to be done if the blurring really benefits the training.**

In [None]:
from scipy.ndimage.filters import gaussian_filter
import os, random
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np


def segment_otsu(img):
    bins_num = 256


    # Get the image histogram
    hist, bin_edges = np.histogram(img, bins=bins_num)

    # Get normalized histogram if it is required

    hist = np.divide(hist.ravel(), hist.max())

    # Calculate centers of bins
    bin_mids = (bin_edges[:-1] + bin_edges[1:]) / 2.

    # Iterate over all thresholds (indices) and get the probabilities w1(t), w2(t)
    weight1 = np.cumsum(hist)
    weight2 = np.cumsum(hist[::-1])[::-1]

    # Get the class means mu0(t)
    mean1 = np.cumsum(hist * bin_mids) / weight1
    # Get the class means mu1(t)
    mean2 = (np.cumsum((hist * bin_mids)[::-1]) / weight2[::-1])[::-1]

    inter_class_variance = weight1[:-1] * weight2[1:] * (mean1[:-1] - mean2[1:]) ** 2

    # Maximize the inter_class_variance function val
    index_of_max_val = np.argmax(inter_class_variance)

    threshold = bin_mids[:-1][index_of_max_val]
    print("Otsu's algorithm implementation thresholding result: ", threshold)
    result = np.zeros_like(img)
    result[img > threshold] = 255
    result[img <= threshold] = 0

    return result

def lower_resolution_image (image):
    return image.resize((80,60))

def blur_image(image):
    return gaussian_filter(image, sigma=1.5)


def get_data():
    dir = "Dataset/Rock_Paper_Sissors_Photos/train/scissors"
    file = random.choice(os.listdir(dir))
    path = os.path.join(dir, file)
    print(path)
    original_image = Image.open(path)
    greyscale_image = Image.open(path).convert('L')  # convert to grayscale

    return original_image, greyscale_image

def crop_image(original, image):
    summation_row_array = np.apply_along_axis(sum,axis=0,arr=image)
    summation_column_array = np.apply_along_axis(sum,axis=1,arr=image)
    delete_indices = []
    max_row_value = 0
    for index in range(len(summation_row_array)):
        if summation_row_array[index] == max_row_value:
            delete_indices.append(index)
        else:
            break

    for index in range(len(summation_row_array)-1,-1,-1):
        if summation_row_array[index] == max_row_value:
            delete_indices.append(index)
        else:
            break
    original = np.delete(original,delete_indices,axis=1)
    image = np.delete(image,delete_indices,axis=1)

    delete_indices = []
    max_column_value = 0
    for index in range(len(summation_column_array)):
        if summation_column_array[index] == max_column_value:
            delete_indices.append(index)
        else:
            break

    for index in range(len(summation_column_array)-1,-1,-1):
        if summation_column_array[index] == max_column_value:
            delete_indices.append(index)
        else:
            break
    original = np.delete(original,delete_indices,axis=0)
    image = np.delete(image,delete_indices,axis=0)

    return original, image

def delete_background(original, segmentation):
    original[segmentation == 0] = 255
    return original

def visualize_greyscale(image, name):
    fig, ax = plt.subplots(figsize=(4, 3))
    ax.imshow(image, cmap=plt.cm.gray)
    #ax.set_title(name)
    ax.axis('off')

def visualize_image(image,name):
    fig, ax = plt.subplots(figsize=(4, 3))
    ax.imshow(image)
    ax.set_title(name)
    ax.axis('off')

def create_histogram(image):
    column_data = np.zeros(image.shape[0])
    for column in range(image.shape[0]):
        column_data[column] = np.sum(image[column])
    fig, ax = plt.subplots(figsize=(4, 3))
    ax.plot(column_data, range(len(column_data)))
    ax.invert_yaxis()
    ax.invert_xaxis()
    ax.set_title('y plot')
    ax.axis('off')
    row_data = np.zeros(image.shape[1])

    for row in range(image.shape[1]):
        row_data[row] = image.sum(axis=0)[row]
    fig, ax = plt.subplots(figsize=(4, 3))
    ax.plot(range(len(row_data)), row_data)
    ax.set_title('x plot')
    ax.axis('off')


In [None]:
def segment_simple_use_case():
    original, greyscale = get_data()
    low_resolution = lower_resolution_image(greyscale)
    blurred = blur_image(low_resolution)
    segmented = segment_otsu(blurred)
    cropped_original, cropped = crop_image(low_resolution, segmented)
    background_deletion = delete_background(cropped_original, cropped)
    create_histogram(cropped)

    visualize_image(original,"Original image")
    visualize_greyscale(greyscale, "Grayscaling")
    visualize_greyscale(low_resolution, "Resolution reduction")
    visualize_greyscale(blurred, "Blurring")
    visualize_greyscale(segmented, "Segmentation")
    visualize_greyscale(cropped, "Centring and zooming in")
    visualize_greyscale(background_deletion, "original without background")



In [None]:
segment_simple_use_case()

In [None]:
import glob
import cv2

def readin_data(path_array, label):
    images = []
    for path in path_array:
        for filename in glob.glob(path):
            image = cv2.imread(filename)
            im = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY),(80,60),interpolation = cv2.INTER_AREA)
            images.append(im)
    label_array = np.full(len(images),label)
    return images,label_array

In [None]:
from tensorflow.keras import utils as us

lable_tensor = us.to_categorical([0, 1, 2], num_classes=3)

def lookup_nominal(nominal):
    str_labels = ('Rock','Paper','Scissors')
    return str_labels[nominal]

In [None]:
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential



In [None]:
data_dir = pathlib.Path('Dataset/structured_data')
validation_dir = pathlib.Path('Dataset/validation_set')
image_count = len(list(data_dir.glob('*/*')))
print("Total images: " +str(image_count))

rock_images = list(data_dir.glob('rock/*'))
paper_images = list(data_dir.glob('paper/*'))
scissors_images = list(data_dir.glob('scissors/*'))

PIL.Image.open(str(scissors_images[1]))

In [None]:
batch_size = 32
img_height = 60
img_width = 60


train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    color_mode='grayscale',
    image_size=(img_height, img_width),
    batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
    validation_dir,
    validation_split=0.9999,
    subset="validation",
    seed=123,
    color_mode='grayscale',
    image_size=(img_height, img_width),
    batch_size=batch_size)

class_names = train_ds.class_names
print(class_names)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

normalization_layer = layers.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal",
                          input_shape=(img_height,
                                       img_width,
                                       1)),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.1),
    ]
)

In [1]:
num_classes = len(class_names)

model = Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, name="outputs")
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

NameError: name 'class_names' is not defined

In [None]:
model.summary()

In [None]:
epochs = 15
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()