# Import all required libraries

In [15]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import plot_model
from keras.preprocessing import image
import numpy as np
import imutils
import matplotlib.pyplot as plt
import os
import cv2
%matplotlib inline

# Set all required variables

In [16]:
TRAIN_DATA_PATH = "datasets/train_data" 
#train_data folder has subfolders "A" to "Z", "nothing" and "space"
#Each subfolder has training images in jpg format

TEST_DATA_PATH = "datasets/test_data"
#test_data folder has subfolders "A" to "Z", "nothing" and "space"
#Each subfolder has training images in jpg format

NUM_OF_LETTERS = 10 # "A" to "Z" + "nothing" + "space"
IMAGE_SIZE = 50 #We'll be workign with 50 * 50 pixel images
NUM_OF_CHANNELS = 1 # Grayscale
NUM_OF_TRAIN_IMAGES = 2000 * NUM_OF_LETTERS 
NUM_OF_TEST_IMAGES = NUM_OF_LETTERS
NUM_OF_DENSE_LAYER_NODES = (IMAGE_SIZE * IMAGE_SIZE) // 2


#to_remove = ["V", "S", "J", "Z", "N"] # nothing, space
LABELS = ['A', 'C', 'E', 'H', 'I', 'L', 'O', 'U', 'V', 'W']
print(len(LABELS))

10


# Creating training and test data from images 

In [17]:

def create_data(DATA_PATH):
    """returns list of [image_array, label_name].
    Keyword arguments:
    DATA_PATH -- Path of train/ test data
    """
    data = []
    for label in LABELS:
    
        
        path = os.path.join(DATA_PATH, label)
        label_name = LABELS.index(label)
        for img in os.listdir(path):
            
            try:
                img_array = cv2.imread(os.path.join(path, img))
                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
                img_array = cv2.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
                #img_array = imutils.resize(img_array, width = IMAGE_SIZE, height = IMAGE_SIZE)
                data.append([img_array, label_name]) #Stores pair as image as array -> corresponding label name 
            except Exception as e:
                pass
     
    return data

test_data = create_data(TEST_DATA_PATH)
training_data = create_data(TRAIN_DATA_PATH)
print(len(training_data))

20000


# Making data sets

In [18]:
def make_datasets(data):
    """returns pair of lists of X and y
    Keyword arguments:
    data -- list of [img_array, label_name]
    """
    x = []
    y = []
    for features, label in data:
        x.append(features)
        y.append(label)
    return x, y

X_train, y_train = make_datasets(training_data)
X_test, y_test = make_datasets(test_data)
print(len(X_train))

20000


# Convert datasets to numpy arrays

In [19]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)


# Resizing X datasets

In [20]:
X_train = X_train.reshape(NUM_OF_TRAIN_IMAGES, IMAGE_SIZE , IMAGE_SIZE, NUM_OF_CHANNELS)
X_test = X_test.reshape(NUM_OF_TEST_IMAGES, IMAGE_SIZE , IMAGE_SIZE, NUM_OF_CHANNELS)
print(X_train.shape)

(20000, 50, 50, 1)


# Normalizing X array as [0, 1]

In [21]:
X_train = X_train.astype("float32")
X_train /= 255.0

X_test = X_test.astype("float32")
X_test /= 255.0

# Resizing Y array using one hot encoding

In [22]:
y_train = to_categorical(y_train, NUM_OF_LETTERS)
y_test = to_categorical(y_test, NUM_OF_LETTERS)
print(X_train.shape)

(20000, 50, 50, 1)


# Creating CNN Model

In [23]:
model = Sequential()

model.add(Conv2D(IMAGE_SIZE, (3, 3), padding = "same", input_shape = (IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS), activation = "relu"))
model.add(Conv2D(IMAGE_SIZE, (3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(2 * IMAGE_SIZE, (3, 3), padding = "same", activation = "relu"))
model.add(Conv2D(2 * IMAGE_SIZE, (3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(NUM_OF_DENSE_LAYER_NODES, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(NUM_OF_LETTERS, activation = "softmax"))

# Compiling CNN Model

In [24]:
model.compile(
    optimizer = "adam", 
    loss = "categorical_crossentropy", 
    metrics = ["accuracy"]
)

In [11]:
model.summary()

#Saving Model Diagram
plot_model(model, to_file='model_plot.png', show_shapes = True, show_layer_names = True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 50, 50, 50)        500       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 48, 48, 50)        22550     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 50)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 24, 24, 50)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 100)       45100     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 22, 22, 100)       90100     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 11, 11, 100)       0         
__________

# Training Model

In [25]:
history = model.fit(
    X_train, 
    y_train, 
    batch_size = 32,
    epochs = 5,
    validation_data = (X_test, y_test),
    shuffle = True
)

Train on 20000 samples, validate on 10 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Saving Model and datasets for future use

In [26]:
#Saving Model
from keras.models import load_model
model.save('withbgmodelv1.h5') 



#Saving Data
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)

# Calculating Test Score

In [None]:
score = model.evaluate(X_test, y_test)
print(score[0])
print(score[1])