In [None]:
import os
import warnings
import cv2
import keras
import matplotlib.pyplot as plt
import matplotlib.style as style
import numpy as np
import pandas as pd
from PIL import Image
from keras import models, layers, optimizers
from keras.applications import VGG16
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from keras.preprocessing import image as image_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

%matplotlib inline
style.use('seaborn-whitegrid')
warnings.filterwarnings(action='once')

In [None]:
gestures = {'L_': 'L',
           'fi': 'Fist',
           'C_': 'C',
           'ok': 'Okay',
           'pe': 'Peace',
           'pa': 'Palm'
            }

gestures_map = {'Fist' : 0,
                'L': 1,
                'Okay': 2,
                'Palm': 3,
                'Peace': 4
                }

In [None]:
def process_image(path):
    img = Image.open(path)
    img = img.resize((224, 224))
    img = np.array(img)
    return img

def process_data(X_data, y_data):
    X_data = np.array(X_data, dtype = 'float32')
    if rgb:
        pass
    else:
        X_data = np.stack((X_data,)*3, axis=-1)
    X_data /= 255
    y_data = np.array(y_data)
    y_data = to_categorical(y_data)
    return X_data, y_data

def walk_file_tree(relative_path):
    X_data = []
    y_data = [] 
    for directory, subdirectories, files in os.walk(relative_path):
        for file in files:
            if not file.startswith('.') and (not file.startswith('C_')):
                path = os.path.join(directory, file)
                gesture_name = gestures[file[0:2]]
                y_data.append(gestures_map[gesture_name])
                X_data.append(process_image(path))   

            else:
                continue

    X_data, y_data = process_data(X_data, y_data)
    return X_data, y_data

class Data(object):
    def __init__(self):
        self.X_data = []
        self.y_data = []

    def get_data(self):
        return self.X_data, self.y_data

#### Get 'silhouette' data (this is the data we used for the final model)

In [None]:
relative_path = './frames/silhouettes/'
rgb = False

# # This method processes the data
X_data, y_data = walk_file_tree(relative_path)

# Can also optionally use a class to get this data, in order to keep it separate from Drawing data
silhouette = Data()
silhouette.X_data, silhouette.y_data = walk_file_tree(relative_path)

In [None]:
print(f'X_data shape: {X_data.shape}')
print(f'y_data shape: {y_data.shape}')

In [None]:
plt.imshow(X_data[0])

#### Get 'drawing' data (this was not used for final model, but I built and tested several models with it)

In [None]:
relative_path = './frames/drawings/'
rgb = True

# This method processes the data
X_data, y_data = walk_file_tree(relative_path)

In [None]:
print(f'X_data shape: {X_data.shape}')
print(f'y_data shape: {y_data.shape}')

In [None]:
plt.imshow(X_data[0])

In [None]:
# Send the dictionaries to a dataframe to be saved for future use
# d = {'image_path':image_path, 'gesture':gesture, 'image_rgb': image_rgb, 'image_bw_x': X_data, 'image_bw_y': y_data}
d = {'image_path':image_path, 'gesture':gesture}
df = pd.DataFrame(d)
# df['gesture_num'] = df['gesture'].apply(lambda x: x[1:2])
# df['gesture_name'] = df['gesture'].apply(lambda x: x[3:])

# df.to_csv('silhouette_df.csv')
# df = pd.read_csv('silhouette_df.csv')

#### Bring in Kaggle data

In [None]:
gestures_map = {3: 0,
                2: 1,
                7: 2,
                1: 3,
                'Peace': 4
                }
X_data = []
y_data = []

root_dir = os.fsencode('./data/gestures_data/')

for directory, subdirectories, files in os.walk(root_dir):
    for file in files:
        if not file.startswith(b'.'):
            gesture_name = int(file.decode('utf8')[10:11])
            if gesture_name in [1, 2, 3, 7]:
                path = os.path.join(directory, file).decode('utf8')
                y_data.append(gestures_map[gesture_name])

                img = cv2.imread(path, cv2.IMREAD_COLOR)
                img = cv2.flip(img, 1)
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                blur = cv2.GaussianBlur(gray, (41, 41), 0)  #tuple indicates blur value
                ret, thresh = cv2.threshold(blur, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                thresh = cv2.resize(thresh, (224, 224))
                thresh = np.array(thresh)
                X_data.append(thresh)

            else:
                continue

process_data(X_data, y_data)


In [None]:
# Have to add a column of zeroes for the 'Peace' sign, since the Kaggle data does not have photos of 
# 'Peace' signs.
z = np.zeros(len(y_data))
y_data = np.append(y_data, z, axis=0)

In [None]:
y_data.shape

#### Notice that the Kaggle images are now thresholded and binarized

In [None]:
plt.imshow(.5 - X_data[250])

In [None]:
plt.imshow(.5 - X_data[100])

In [None]:
plt.imshow(.5 - X_data[1200])

In [None]:
#X_train_rgb, X_test_rgb, y_train_rgb, y_test_rgb = train_test_split(image_rgb, y_data, test_size = 0.2, random_state=12, stratify=y_data)
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.2, random_state=12, stratify=y_data)

#### Train-test split

#### Build the VGG Model

In [None]:
file_path = './models/saved_model.hdf5'
model_checkpoint = ModelCheckpoint(filepath=file_path, save_best_only=True)

early_stopping = EarlyStopping(monitor='val_accuracy',
                               min_delta=0,
                               patience=10,
                               verbose=1,
                               mode='auto',
                               restore_best_weights=True)


In [None]:
imageSize=224
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(imageSize, imageSize, 3))
#vgg_base = VGG16(weights='imagenet', include_top=True)
optimizer1 = optimizers.Adam()

base_model = vgg_base  # Topless
# Add top layer
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu', name='fc1')(x)
x = Dense(128, activation='relu', name='fc2')(x)
x = Dense(128, activation='relu', name='fc3')(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu', name='fc4')(x)
predictions = Dense(5, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Train top layers only
for layer in base_model.layers:
    layer.trainable = False

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_train, y_train), verbose=1,
          callbacks=[early_stopping, model_checkpoint])


In [None]:
# load VGG16
# Get back the convolutional part of a VGG network trained on ImageNet

imageSize = 224
model1 = VGG16(weights='imagenet', include_top=False, input_shape=(imageSize, imageSize, 3))
optimizer1 = optimizers.Adam(learning_rate=0.001)

base_model = model1  # Topless
# Add top layer
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu', name='fc1')(x)
x = Dense(128, activation='relu', name='fc2')(x)
x = Dense(128, activation='relu', name='fc3')(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu', name='fc4')(x)

predictions = Dense(5, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Train top layer
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(),
              metrics=['accuracy'])

#callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3, verbose=1)]
callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_train, y_train), verbose=1,
          callbacks=[early_stopping, model_checkpoint])

'''
# Uncomment the section below and use in lieu of model.fit() above
# if using image augmentation. Our final model did not.

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=45.,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True)

datagen.fit(X_train)

fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train)/32, epochs=150, validation_data=(X_test, y_test))
'''


#### Save and import model

In [None]:
# model.save('models/VGG_reversed.h5')

from keras.models import load_model
model = load_model('/home/ubuntu/project_kojak/models/VGG_reversed.h5')

#### Get classification metrics

In [None]:
def get_classification_metrics(X_test, y_test):
    pred = model.predict(X_test)
    pred = np.argmax(pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    print(confusion_matrix(y_true, pred))
    print('\n')
    print(classification_report(y_true, pred))

**VGG_cross_validated model**

Note that there are no predictions for class \#5 since 'Peace' sign did not exist in cross_val images

In [None]:
# get_classification_metrics(X_data, y_data)

#### Predict gesture on a single image

In [None]:
gesture_names = {0: 'C',
                 1: 'Fist',
                 2: 'L',
                 3: 'Okay',
                 4: 'Palm',
                 5: 'Peace'}

def predict_rgb_image(path):
    img2rgb = image_utils.load_img(path=path, target_size=(224, 224))
    img2rgb = image_utils.img_to_array(img2rgb)
    img2rgb = img2rgb.reshape(1, 224, 224, 3)
    return gesture_names[np.argmax(model.predict(img2rgb))]


In [None]:
predict_rgb_image('images_to_predict/test - palm.jpg')

#### Image Augmentation

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=45.,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train_rgb)

# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(X_train_rgb, y_train_rgb, batch_size=32),
                    steps_per_epoch=len(X_train_rgb) / 128, epochs=10, validation_data=(X_test_rgb, y_test_rgb))

In [None]:
np.argmax(model.predict(X_test_rgb[0].reshape(1, 224, 224, 3)))


#### Build own model
Note that we ended up using the model on top of VGG, not this one.

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (5, 5), strides=(2, 2), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(Dropout(0.25, seed=21))
model.add(layers.Dense(5, activation='softmax'))


In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train[:500], y_train[:500], epochs=4, batch_size=64, validation_data=(X_test[:500], y_test[:500]), verbose=1, callbacks = [MetricsCheckpoint('logs')])

In [None]:
model.fit(X_train_rgb, y_train_rgb, epochs=200, batch_size=16, validation_data=(X_test_rgb, y_test_rgb), verbose=1, callbacks =[early_stopping, model_checkpoint])

In [None]:
def get_classification_reports(y_pred, y_true):
    y_pred_classes = np.array(np.argmax(y_pred))  # reconverts back from one hot encoded 
    y_true = np.array(np.argmax(y_true))  # reconverts back from one hot encoded
    print(confusion_matrix(y_true, y_pred_classes))
    print(classification_report(y_true, y_pred_classes))

#### Classification metrics for an earlier version of the model

In [None]:
print(classification_report(y_pred_classes, y_true))

In [None]:
print(confusion_matrix(y_pred_classes, y_true))