In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
import cv2
from imutils.video import VideoStream
%matplotlib inline


In [3]:
# Locally read in data and make our training and testing set
DATA_DIR = "rockpaperscissors"
CATEGORIES = ["paper", "rock", "scissors"]
IMG_WIDTH = 150
IMG_HEIGHT = 100
BATCH_SIZE = 100
    
def load_data(data_dir, categories, img_width, img_height):
    X = []
    y = []
    index = -1
    for category in categories:
        index += 1
        one_hot = np.zeros(len(categories))  # to encode the class as a one hot vector
        one_hot[index] = 1
        path = os.path.join(data_dir, category)
        
        for img in os.listdir(path):  # get all images in the path
            img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            img_arr = cv2.resize(img_arr, (img_width, img_height))
            img_arr = np.asarray(img_arr)
            X.append(img_arr)
            y.append(one_hot)
            
    return X, y

X, y = load_data(DATA_DIR, CATEGORIES, IMG_WIDTH, IMG_HEIGHT)


In [4]:
# Divide our data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)

# Normalize our data
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

# reshape the data into a 4D tensor - (sample_number, x_img_size, y_img_size, num_channels)
# because we are using greyscale, we only have a single channel - RGB colour images would have 3
X_train = X_train.reshape(X_train.shape[0], IMG_WIDTH, IMG_HEIGHT, 1)
X_test = X_test.reshape(X_test.shape[0], IMG_WIDTH, IMG_HEIGHT, 1)
input_shape = (IMG_WIDTH, IMG_HEIGHT, 1)

# convert the data to the right type
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


x_train shape: (1750, 150, 100, 1)
1750 train samples
438 test samples


In [7]:
# Configure the CNN
classifier = tf.keras.models.Sequential()

# Create model

# 3x 2d convolution layers
# Non-linearity (RELU) - replace all negative pixel values in feature map with zero
classifier.add(tf.keras.layers.Conv2D(32, (3,3), input_shape=(IMG_WIDTH, IMG_HEIGHT, 1), activation='relu')) 
classifier.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

classifier.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu'))
classifier.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

classifier.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu'))
classifier.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

# Flatten 3d model into 1d
classifier.add(tf.keras.layers.Flatten())

# feature vectors
classifier.add(tf.keras.layers.Dense(64, activation='relu'))
classifier.add(tf.keras.layers.Dropout(0.5))
classifier.add(tf.keras.layers.Dense(3, activation='softmax'))

# compile model
classifier.compile(loss='categorical_crossentropy',
                   optimizer="rmsprop",
                   metrics=['accuracy'])


In [8]:
# run and train model
classifier.fit(np.array(X_train), np.array(y_train),
               batch_size=BATCH_SIZE,
               epochs=13, 
               verbose=1,
               validation_data=(np.array(X_test), np.array(y_test))
               )

Train on 1750 samples, validate on 438 samples
Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


<tensorflow.python.keras.callbacks.History at 0x155cc30d0>

In [9]:
# test model
score = classifier.evaluate(np.array(X_test), np.array(y_test), verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])




Test loss: 1.0997339721139707
Test accuracy: 0.3196347


In [9]:
def detect_and_display(model, video_source):
    # Get frame from video soruce
    frame = video_source.read()
    
    # Convert image for model
    small = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
    gray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
    # Reshape for input to NN
    img_arr = gray.reshape(1, IMG_WIDTH, IMG_HEIGHT, 1)
    # Cast to float to handle error
    img_arr = tf.cast(img_arr, tf.float32)
    # Save colour image to show to user
    colour = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    prediction = classifier.predict(img_arr)
    # Convert prediction from one hot to category
    index = tf.argmax(prediction[0], axis=0)
    prediction = CATEGORIES[index]
    
    cv2.putText(frame, prediction, (20, 50), cv2.FONT_HERSHEY_SIMPLEX,3, (0, 255, 0), 2)
    return frame
    
    
def live_detection(model):
    video_source = VideoStream(src=0).start()
    
    while True:
        frame = detect_and_display(model, video_source)
        cv2.imshow("Face Liveness Detector", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
    video_source.stop()
    
live_detection(1)
