In [None]:
## Importing all packages needed to run the notebook


import tensorflow

import keras 
from keras import applications
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Dropout, Flatten, Dense
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import optimizers

from tqdm import tqdm

import numpy as np

import cv2
import os

import sklearn
from sklearn.datasets import load_files       
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from glob import glob

import matplotlib.pyplot as plt
%matplotlib inline

from scipy import ndimage 

from PIL import Image
from PIL import ImageEnhance

import random

In [None]:
## PART I: Building the tools the applications will rely on

In [None]:
def identify_hand(bg, roi):
    _ , thresholded = cv2.threshold(bg, 125, 255, cv2.THRESH_BINARY)
    _, hand_binary = cv2.threshold(roi, 125, 255, cv2.THRESH_BINARY)
    
    diff = cv2.absdiff(thresholded, hand_binary)
    
    contours, hierarchy = cv2.findContours(diff.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

    for i in range(0,len(contours)):
        if hierarchy[0][i][3] == -1:
            cv2.drawContours(diff, contours, i, (125,125,125), 5)
    
    return diff

In [None]:
def save_image(frame, diff, num_frames):
    if num_frames >= 50 and num_frames < 200:
        cv2.putText(frame, "Make a paper", (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)
        
    if num_frames >= 100 and num_frames < 200:
        file_name_path = 'C:\\Users\\rock_paper_scissors\\paper\\' + str(num_frames) + '.jpg'
        cv2.imwrite(file_name_path, diff)
    
    if num_frames >= 200 and num_frames < 350:
        cv2.putText(frame, "Make a rock", (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)
        
    if num_frames >= 250 and num_frames < 350:
        file_name_path = 'C:\\Users\\rock_paper_scissors\\rock\\' + str(num_frames) + '.jpg'
        cv2.imwrite(file_name_path, diff)
    
    if num_frames >= 350 and num_frames < 500:
        cv2.putText(frame, "Make a scissors", (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)
        
    if num_frames >= 400 and num_frames < 500:
        file_name_path = 'C:\\Users\\rock_paper_scissors\\scissors\\' + str(num_frames) + '.jpg'
        cv2.imwrite(file_name_path, diff)
        
    return frame
    

In [None]:
## Create the training images for the classifier (rock, paper, scissors, other)


# Specify the variables to identify the ROI
x1, x2 = 75, 460
y1, y2 = 400, 640

# Create a None variable which will become the background (for background substraction)
bg = None

# Open the stream video
cam = cv2.VideoCapture(0)
num_frames = 0

# Keep looping, until interrupted
while True:
    
    ret, frame = cam.read()
    
    # Flip the frame so that it is not the mirror view
    frame = cv2.flip(frame, 1)
    
    # Draw a rectangle to indicate the ROI
    frame = cv2.rectangle(frame, (y1,x1), (y2,x2), (255,255,255), 3)
    
    # Get first image as background
    if bg is None:
        bg = cv2.cvtColor(frame[x1:x2,y1:y2], cv2.COLOR_BGR2GRAY)
        bg = cv2.GaussianBlur(bg, (7,7), 0)
        
    roi = frame[x1:x2,y1:y2]
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    roi = cv2.GaussianBlur(roi, (7,7), 0)
    
    # Use the BG and ROI to identify the hand gesture
    diff = identify_hand(bg, roi)
    
    # Save the image to train the classifier later on
    frame = save_image(frame, diff, num_frames)
            
    # Count the frames to know when to change gesture    
    num_frames += 1
    
    # Show the output
    cv2.imshow('Game', frame)
    cv2.imshow('Hand',diff)

    # Close windows with Enter
    k = cv2.waitKey(1) & 0xFF

    if k == 13:
        break

# Release the camera and destroy all the windows
cam.release()
cv2.destroyAllWindows()

In [None]:
## Preparing the augmentation process (of the training data)


# Defining a set of functions to load the data
 
nb_category = 3

# Import data and output image file and target file 
def load_dataset(path):
    data = load_files(path)
    img_files = np.array(data['filenames'])
    img_targets = np_utils.to_categorical(np.array(data['target']), nb_category)
    return img_files, img_targets

# Convert the image file into a tensor format
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(240, 240))
    # convert PIL.Image.Image type to 3D tensor with shape (240, 240, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 240, 240, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

# Loop over a series of images
def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)


# Creating separate folders to host the created images (can also be skipped to add the images directly to the original files) 

# Go through the original files and extract the target name
category_names = [item[52:-1] for item in sorted(glob('C:/Users/rock_paper_scissors/*/'))]

# Create a file for each category name (to host augmented images of that category)
for cat in category_names:
    os.makedirs("C:/Users/img_created_{}".format(cat))

print(category_names)

In [None]:
## Creating new images from original data


# Load the original dataset
img, targets = load_dataset('C:\\Users\\rock_paper_scissors')

# Group images by target
for cat in category_names:
    index = []
    for i in range(0,len(targets)):
            ind = [category_names[np.argmax(targets[i])] == cat]
            index.append(ind)
            indexes = np.concatenate(np.asarray(index))

    img_to_create = img[indexes]
    img_to_tensor = paths_to_tensor(img_to_create)/255

    
    # Perform the transformation
    datagen = ImageDataGenerator(zca_whitening=True,
                                 zca_epsilon=1e-3,
                                 rotation_range=20,
                                 width_shift_range=0.15,
                                 height_shift_range=0.15,
                                 horizontal_flip=True,
                                 fill_mode='nearest')

    i=0
    for batch in datagen.flow(img_to_tensor,
                              batch_size=50,
                              shuffle=True,
                              save_to_dir= "C:/Users/img_created_{}".format(cat),
                              save_format='jpeg'):
        i +=1
        if i >10:
            break


In [None]:
## Creating and training the deep learning model for sign recognition

# Loading the dataset and separating into train, validation and test set
img, img_targets = load_dataset('C:\\Users\\rock_paper_scissors')
X, X_test, y, y_test = train_test_split(img, img_targets, test_size=0.2, random_state=42, shuffle=True, stratify=img_targets)
X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

# Printing the lengths to make sure they are no mistakes
print('Total number of images: {}, Total number of labels: {}'.format(len(img), len(img_targets)))
print('Total number of train images: {}, Total number of train labels: {}'.format(len(X_train), len(y_train)))
print('Total number of validation images: {}, Total number of validation labels: {}'.format(len(X_validation), len(y_validation)))
print('Total number of test images: {}, Total number of test labels: {}'.format(len(X_test), len(y_test)))

# Transform train, validation and test data into tensor 
img_train = paths_to_tensor(X_train)
img_valid = paths_to_tensor(X_validation)
img_test = paths_to_tensor(X_test)


# Setting some parameters 
top_model_weights_path = 'C:\\Users\\rock_paper_scissors.h5'
epochs = 30
batch_size = 25
RMS = optimizers.RMSprop(lr=0.0001, rho=0.9)


# Building the model
new_model = Sequential()
new_model.add(Conv2D(filters=32, kernel_size=(3,3), input_shape=img_train.shape[1:], activation='relu'))
new_model.add(MaxPooling2D(pool_size=(2,2)))
new_model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
new_model.add(MaxPooling2D(pool_size=(2,2)))
new_model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
new_model.add(GlobalAveragePooling2D())
new_model.add(Dense(250, activation='relu'))
new_model.add(Dense(300, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(500, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(750, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(3, activation='softmax'))


new_model.compile(optimizer=RMS,
                  loss='categorical_crossentropy', metrics=['accuracy'])


checkpointer = ModelCheckpoint(filepath=top_model_weights_path, 
                               verbose=1, save_best_only=True)

early_stopping_monitor = EarlyStopping(patience=15)


# Training the model
new_model.fit(img_train, y_train,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(img_valid, y_validation),
              callbacks=[checkpointer, early_stopping_monitor])


In [None]:
##  Testing the model

# Loading the weights of the trained top model
new_model.load_weights(top_model_weights_path)

# Applying the top model to the bottleneck data and calculate test accuracy
pred = [np.argmax(new_model.predict(np.expand_dims(pred_feat, axis=0))) for pred_feat in img_test]
test_accuracy = 100*np.sum(np.array(pred)==np.argmax(y_test, axis=1))/len(pred)
print('Test accuracy: %.4f%%' % test_accuracy)

In [None]:
## PART II: Assemble the game

In [None]:
# Build functions to create the model and process the results

def dl_recognition(top_model_weights_path = 'C:\\Users\\rock_paper_scissors.h5'):
    
    ## Build the model and import the weights
    top_model_weights_path = top_model_weights_path

    
    # Building the model
    new_model = Sequential()
    new_model.add(Conv2D(filters=32, kernel_size=(3,3), input_shape=(240, 240, 3), activation='relu'))
    new_model.add(MaxPooling2D(pool_size=(2,2)))
    new_model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
    new_model.add(MaxPooling2D(pool_size=(2,2)))
    new_model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
    new_model.add(GlobalAveragePooling2D())
    new_model.add(Dense(250, activation='relu'))
    new_model.add(Dense(300, activation='relu'))
    new_model.add(Dropout(0.3))
    new_model.add(Dense(500, activation='relu'))
    new_model.add(Dropout(0.3))
    new_model.add(Dense(750, activation='relu'))
    new_model.add(Dropout(0.3))
    new_model.add(Dense(3, activation='softmax'))


    # Loading the weights of the trained top model
    new_model.load_weights(top_model_weights_path)
    
    return new_model




def results(prediction, opponent):
    result = np.array([0,0,0])
    
    # Embed the rules of the game and count the results
    if prediction == 0 and opponent == 1:
        result[0] = 1
    elif prediction == 1 and opponent == 2:
        result[0] = 1
    elif prediction == 2 and opponent == 0:
        result[0] = 1
    elif prediction == opponent:
        result[1] = 1
    else:
        result[2] = 1
            

    return result

def preprocess_img(frame):
    img_model = cv2.resize(frame, (240,240))
    img_model = cv2.cvtColor(img_model, cv2.COLOR_GRAY2RGB)
    img_model = np.expand_dims(img_model, axis=0)
    
    return img_model


In [None]:
# Combining the pieces to create the game

bg = None
opponent = random.randint(0,2)

cam = cv2.VideoCapture(0)

num_frames = 0
count_result = np.array([0,0,0])
model = dl_recognition()

# keep looping, until interrupted
while True:
    
    ret, frame = cam.read()
    
    # Flip the frame so that it is not the mirror view
    frame = cv2.flip(frame, 1)
    
    # Draw a rectangle to indicate the ROI
    frame = cv2.rectangle(frame, (y1,x1), (y2,x2), (255,255,255), 3)
    
    # Get first image as background
    if bg is None:
        bg = cv2.cvtColor(frame[x1:x2,y1:y2], cv2.COLOR_BGR2GRAY)
        bg = cv2.GaussianBlur(bg, (7,7), 0)
        
    roi = frame[x1:x2,y1:y2]
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    roi = cv2.GaussianBlur(roi, (7,7), 0)
    
    # Use the BG and ROI to identify the hand gesture 
    diff = identify_hand(bg, roi)
    
    # Preprocess the image so it is model compliant
    img_model = preprocess_img(diff)
    
    # Run the frame through the model so the player knows what (s)he is playing
    visual = np.argmax(model.predict(img_model))
    
    # Run the game every 100 frames so player has time to strategize
    if num_frames % 100 == 0:
            prediction = np.argmax(model.predict(img_model))
            opponent = random.randint(0,2)
            result = results(prediction, opponent)
            count_result += result 

    else:
            opponent = opponent
    
    num_frames += 1
    
    # Print the information to player
    frame = cv2.putText(frame, str('Opponent: ' + category_names[opponent]), (50,50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 2)
    frame = cv2.putText(frame, str('You: ' + category_names[visual]), (400,50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 2)  
    frame = cv2.putText(frame, str('win: ' + str(count_result[0])), (200,400), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 2)
    frame = cv2.putText(frame, str('draw: ' + str(count_result[1])), (200,425), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 2)
    frame = cv2.putText(frame, str('loss: ' + str(count_result[2])), (200,450), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,255), 2)

    # Show game screen
    cv2.imshow('Game', frame)

    # Close windows with Enter
    k = cv2.waitKey(1) & 0xFF

    if k == 13:
        break

# Release the camera and destroy all the windows
cam.release()
cv2.destroyAllWindows()