In [7]:
# make sure matplotlib shows images inline
%matplotlib inline

# import packages
import numpy as np
import time
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import os
import cv2
from PIL import Image

# Keras packages
from keras import layers
from keras.layers import Input,Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.initializers import glorot_uniform

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

import keras.backend as K

In [3]:
# location of the images
imgloc = 'D:/Documents/GitHub/HandSign_Recognition/00 Data/Own/'

# print the current datetime
print(time.ctime())

# read all images from file into a numpy array
# cv2 assumes colors are BGR, so we also convert this to RGB
train_img = np.array([cv2.cvtColor(cv2.imread(imgloc + name), cv2.COLOR_BGR2RGB) for name in os.listdir(imgloc)], dtype = np.object)

# use the image names to create a numpy array with the label of each image
train_label  = np.array([int(name.rpartition(' ')[0].rpartition('_')[2]) for name in os.listdir(imgloc)])

# print the current datetime
print(time.ctime())

Tue Jan 30 20:13:03 2018
Tue Jan 30 20:13:36 2018


In [4]:
print(train_img.shape)
print(train_label.shape)
print(train_label)

(10, 3968, 2976, 3)
(10,)
[1 1 1 1 1 2 2 2 2 2]


In [31]:
# resize the images
#np.reshape(train_img, (10,64,64,3))
#print(train_img.shape)


img = train_img[1].astype('uint8')
Image.fromarray(img)
print(img.shape)
img = img.reshape(64,64,3)
#plt.imshow(img)
print(img.shape)

(3968, 2976, 3)


ValueError: cannot reshape array of size 35426304 into shape (64,64,3)

In [4]:
# determine the number of unique labels
nr_possible_values = np.unique(train_label).size

# create a matrice with only zeros
# the number of rows = the number of images
# the number of columns = the number of possible values we want to recognize
label_matrix_train = np.zeros([train_label.shape[0], nr_possible_values])

# create a dictionary for the labels
# we're going to use this dictionary to determine which column in the matrix corresponds to which value
label_dict = {1: 0, 2: 1}

# set the value of 1 for each record in the column with the corresponding value
count = 0
for i in train_label:
    label_matrix_train[count, label_dict[i]] = 1
    count = count + 1
    
#print(label_matrix_train)

[[ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]]


In [None]:
# normalize the data (set all values between [0,1])
train_img_norm = train_img / 255
#print(train_img_norm)

In [None]:
# make sure to tell keras the channels are the last dimension in the shape of the dataset
# in this case the channel = 3, since we have full color images with 3 RGB channels
K.set_image_data_format('channels_last')



def plain_layer(X,n_c):
    X_in = X
    X = Conv2D(n_c,kernel_size = (3,3), padding = 'same')(X_in)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size = (2,2))(X)
    return X



def identity_block(X,F):
    X_in = X
    
    F1,F2,F3 = F
    
    X = Conv2D(F1,kernel_size=(3,3),padding='same')(X_in)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    
    X = Conv2D(F2,kernel_size=(3,3),padding='same')(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    
    X = Conv2D(F3,kernel_size=(3,3),padding='same')(X)
    X = BatchNormalization(axis=3)(X)
    
    X_in = Conv2D(F3,kernel_size=(3,3),padding='same')(X_in)
    X_in = BatchNormalization(axis=3)(X_in)
    
    X = Add()([X,X_in])
    X = Activation('relu')(X)
    
    return X



def Resnet(input_shape=(64,64,3),classes=6):
    X_in = Input(input_shape)
    
    X = plain_layer(X_in,32)
    
    F1 = [16,16,32]
    X = identity_block(X,F1)
    #X = MaxPooling2D(pool_size=(2,2))(X)
    
    F2 = [16,16,32]
    X = identity_block(X,F2)
    #X = MaxPooling2D(pool_size=(2,2))(X)
    
    F3 = [16,16,32]
    X = identity_block(X,F3)
    #X = MaxPooling2D(pool_size=(2,2))(X)
    
    #X = plain_layer(X,32)
    X = AveragePooling2D((2,2))(X)
    
    X = Flatten()(X)
    X = Dense(512,activation='relu')(X)
    X = Dense(128,activation='relu')(X)
    X = Dense(classes,activation='softmax')(X)
    
    model = Model(inputs=X_in,outputs=X,name='Resnet')
    return model



# declare a resnet model
my_model = Resnet()

# print the current date and time
print(time.ctime())

my_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
my_model.fit(x = X_train, y = Y_train, epochs = 20, batch_size = 32)

# print the current date and time
print(time.ctime())

time.sleep(5)