# Gesture recognition
# Kaszti Dávid
# Kézi gesztusfelismerés PowerPoint vezérélésére
# Hand gesture recognition for controlling PowerPoint
# Pix2Pix training Python Jupyter notebook

# GAN hálózat alapja:
https://machinelearningmastery.com/how-to-implement-pix2pix-gan-models-from-scratch-with-keras/

In [None]:
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Activation, Dropout, Concatenate, BatchNormalization
from keras.layers import Conv2D, Conv2DTranspose
from keras.optimizers.optimizer_v2.adam import Adam
from keras import metrics
import numpy as np
from numpy.random import randint
from numpy import zeros
from numpy import ones
import os
import cv2
from tensorflow.keras.layers import Input, LeakyReLU
from keras.initializers import RandomNormal
import random
import skimage.exposure

In [None]:
def mask_randomly_rectangle(img,rate,shape):
    y1 = np.random.randint(0, 64 - shape[0], 1)
    y2 = y1 + shape[0]
    x1 = np.random.randint(0, 64 - shape[1], 1)
    x2 = x1 + shape[1]
    masked_img = img.copy()
    cpx = x1
    while y1 < y2:
        while x1 < x2:
            masked_img[ x1,y1, :] = masked_img[ x1,y1,  :] * rate
            x1 = x1 + 1
        x1 = cpx
        y1 = y1 + 1
    return masked_img


def mask_randomly_lines_pattern(img,rate,shape):
    base = img.copy()
    x1, y1 = np.random.randint(1, 14), np.random.randint(1, 14)
    x2, y2 = np.random.randint(50, 63), np.random.randint(1, 14)
    alpha = 1 - rate
    for index in range(np.random.randint(1, 5)):
        thickness = np.random.randint(3, 6)
        color = (0, 0, 0)
        cv2.line(base, (x1, y1 + index * 14), (x2, y2 + index * 14), color, thickness)
    r = cv2.addWeighted(base, alpha, img, 1 - alpha, 0)
    return r


def mask_randomly_irregular(img,rate,shape, rng):
    noise = rng.integers(230, 255, (img.shape[0], img.shape[1]), np.uint8, True)
    blured_image = cv2.GaussianBlur(noise, (0, 0), sigmaX=13, sigmaY=13, borderType=cv2.BORDER_DEFAULT)
    stretch = skimage.exposure.rescale_intensity(blured_image, in_range='image', out_range=(0, 255)).astype(np.uint8)
    thresh = cv2.threshold(stretch, 170, 255, cv2.THRESH_BINARY)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (12, 12))
    mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.merge([mask, mask, mask])
    result = cv2.add(img, mask)
    result = np.where(mask == (255, 255, 255), img * rate, result)
    return result

In [None]:
def create_paired_dataset(source, destination):
    seedval = 917
    rng = np.random.default_rng(seed=seedval)

    for filename in os.listdir(source):
        img = cv2.imread(os.path.join(source, filename))
        rate = round(random.uniform(0.1, 0.3), 2)
        shape_one = np.random.randint(20, 45, 1)
        shape_two = np.random.randint(20, 45, 1)
        shape = (shape_one, shape_two)
        selector = np.random.randint(1,3)
        if selector == 1:
            img = mask_randomly_rectangle(img,rate,shape)
        elif selector == 2:
            img = mask_randomly_irregular(img, rate, shape, rng)
        else:
            img = mask_randomly_lines_pattern(img,rate,shape)
        cv2.imwrite(destination + filename, img)

In [None]:
def encoder_block(input_layer, n_filters, batchnorm=True, trainable=True, stride_shape=(2, 2)):
    initializer = RandomNormal(stddev=0.02)
    block = Conv2D(n_filters, (4, 4), strides=stride_shape, padding='same', kernel_initializer=initializer)(input_layer)
    if batchnorm and trainable:
        block = BatchNormalization()(block, training=True)
    elif batchnorm:
        block = BatchNormalization()(block)
    block = LeakyReLU(alpha=0.2)(block)
    return block


def decoder_block(input_layer, skip_in, n_filters, dropout=False):
    initializer = RandomNormal(stddev=0.02)
    block = Conv2DTranspose(n_filters, (4, 4), strides=(2, 2), padding='same', kernel_initializer=initializer)(input_layer)
    block = BatchNormalization()(block, training=True)
    if dropout:
        block = Dropout(0.5)(block, training=True)
    block = Concatenate()([block, skip_in])
    block = Activation('relu')(block)
    return block

In [None]:
def define_discriminator(image_shape):
    initializer = RandomNormal(stddev=0.02)
    input_source = Input(shape=image_shape)
    input_target = Input(shape=image_shape)
    merged = Concatenate()([input_source, input_target])
    layer = encoder_block(merged,64,False)
    layer = encoder_block(layer,128,True)
    layer = encoder_block(layer,256,True)
    layer = encoder_block(layer,512,True)
    one_shape = (1,1)
    layer = encoder_block(layer,512,True,True, one_shape)
    layer = Conv2D(1, (4,4), padding='same', kernel_initializer=initializer)(layer)
    patch = Activation('sigmoid')(layer)
    model = Model([input_source, input_target], patch)
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, loss_weights=[0.5])
    model.summary()
    keras.utils.plot_model(model, "d_model.png", show_layer_activations=False ,expand_nested=False, show_shapes=True, dpi=128)
    return model

In [None]:
def define_generator(image_shape=(256, 256, 3)):
    initializer = RandomNormal(stddev=0.02)
    in_image = Input(shape=image_shape)
    encoder1 = encoder_block(in_image, 64, batchnorm=False)
    encoder2 = encoder_block(encoder1, 128)
    encoder3 = encoder_block(encoder2, 256)
    encoder4 = encoder_block(encoder3, 512)
    encoder5 = encoder_block(encoder4, 512)
    bottleneck_layer = Conv2D(512, (4, 4), strides=(2, 2), padding='same', kernel_initializer=initializer)(encoder5)
    bottleneck_layer = Activation('relu')(bottleneck_layer)
    decoder1 = decoder_block(bottleneck_layer, encoder5, 512, dropout=True)
    decoder2 = decoder_block(decoder1, encoder4, 512)
    decoder3 = decoder_block(decoder2, encoder3, 256)
    decoder4 = decoder_block(decoder3, encoder2, 128)
    decoder5 = decoder_block(decoder4, encoder1, 64)
    final_layer = Conv2DTranspose(3, (4, 4), strides=(2, 2), padding='same', kernel_initializer=initializer)(decoder5)
    out_image = Activation('tanh')(final_layer)
    model = Model(in_image, out_image)
    model.summary()
    keras.utils.plot_model(model, "g_model.png", show_shapes=True, dpi=128)
    return model


In [None]:
def define_gan(g_model, d_model, image_shape):
    for layer in d_model.layers:
        if not isinstance(layer, BatchNormalization):
            layer.trainable = False
    in_src = Input(shape=image_shape)
    gen_out = g_model(in_src)
    dis_out = d_model([in_src, gen_out])
    model = Model(in_src, [dis_out, gen_out])
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss=['binary_crossentropy', 'mae'], optimizer=opt, loss_weights=[1, 100])
    return model

In [None]:
def generate_real_samples(dataset, n_samples, patch_shape):
    trainA, trainB = dataset
    ix = randint(0, trainA.shape[0], n_samples)
    X1, X2 = trainA[ix], trainB[ix]
    y = ones((n_samples, patch_shape, patch_shape, 1))
    return [X1, X2], y


def generate_fake_samples(g_model, samples, patch_shape):
    X = g_model.predict(samples)
    y = zeros((len(X), patch_shape, patch_shape, 1))
    return X, y

In [None]:
def load_paired_dataset(basepath, maskedpath):
    X1 = []
    X2 = []

    for fileName in os.listdir(basepath):
        image = cv2.imread(basepath + fileName)
        X2.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    for fileName in os.listdir(maskedpath):
        image = cv2.imread(maskedpath + fileName)
        X1.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    X1 = np.array(X1)
    X2 = np.array(X2)
    X1 = (X1.astype(np.float32) - 127.5) / 127.5
    X2 = (X2.astype(np.float32) - 127.5) / 127.5
    return [X1, X2]


In [None]:
def train(d_model, g_model, gan_model, dataset, n_epochs=100, n_batch=1):
    n_patch = d_model.output_shape[1]
    trainA, trainB = dataset
    bat_per_epo = int(len(trainA) / n_batch)
    n_steps = bat_per_epo * n_epochs
    for i in range(n_steps):
        [X_realA, X_realB], y_real = generate_real_samples(dataset, n_batch, n_patch)
        X_fakeB, y_fake = generate_fake_samples(g_model, X_realA, n_patch)
        d_loss1 = d_model.train_on_batch([X_realA, X_realB], y_real)
        d_loss2 = d_model.train_on_batch([X_realA, X_fakeB], y_fake)
        g_loss, _, _ = gan_model.train_on_batch(X_realA, [y_real, X_realB])
        print("Steps: %d/%d || real d loss: %.3f || fake d loss: %.3f || g loss: %.3f" % (i + 1, n_steps, d_loss1, d_loss2, g_loss))
        if i == 20:
            filename = 'model_%06d.h5' % (i + 1)
            g_model.save(filename)
            print('Saved: ' + filename)
        if (i + 1) % (bat_per_epo * 10) == 0:
            filename = 'model_%06d.h5' % (i + 1)
            g_model.save(filename)
            print('Saved: ' + filename)

In [None]:
def run_train(base_data, masked_data):
    dataset = load_paired_dataset(base_data, masked_data)
    print('Loaded', dataset[0].shape, dataset[1].shape)
    image_shape = dataset[0].shape[1:]
    d_model = define_discriminator(image_shape)
    g_model = define_generator(image_shape)
    gan_model = define_gan(g_model, d_model, image_shape)
    train(d_model, g_model, gan_model, dataset)

In [None]:
source = "C:/Users/admin/Desktop/unified_rgb_extended/"
destination = "C:/Users/admin/Desktop/arnyekolasok/mask_mixed_extended_pattern_lines_v2/"
#create_paired_dataset(source, destination)
run_train(source, destination)