Image Classification of an American Sign Language Dataset using the concept of data augmentation and CNN

In [None]:
# Importing the libaries required
import tensorflow.keras as keras
import pandas as pd

# Load in our data from CSV files
train_df = pd.read_csv("sign_mnist_train.csv")
valid_df = pd.read_csv("sign_mnist_valid.csv")

# Separate out the target values
y_train = train_df['label']
y_valid = valid_df['label']
del train_df['label']
del valid_df['label']

# Separate out the image vectors
x_train = train_df.values
x_valid = valid_df.values

# Turn the scalar targets into binary categories
num_classes = 24
y_train = keras.utils.to_categorical(y_train, num_classes)           # Encode the target values 
y_valid = keras.utils.to_categorical(y_valid, num_classes)

# Normalize the image data between 0 to 1
x_train = x_train / 255
x_valid = x_valid / 255

# Reshape the image data for the convolutional network
x_train = x_train.reshape(-1,28,28,1)                  # We need to convert the current shape (27455, 784) to (27455, 28, 28, 1). As a convenience, we can pass the reshape method a -1 for any dimension we wish to remain the same
x_valid = x_valid.reshape(-1,28,28,1)

In [None]:
# CNN model architecture
# Creating a convolutional model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    Conv2D,
    MaxPool2D,
    Flatten,
    Dropout,
    BatchNormalization,
)

model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding="same", activation="relu", 
                 input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(50, (3, 3), strides=1, padding="same", activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(units=512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(units=num_classes, activation="softmax"))

In [None]:
# Data Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,        # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.1,           # Randomly zoom image
    width_shift_range=0.1,    # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,   # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,     # randomly flip images horizontally
    vertical_flip=False,      # Don't randomly flip images vertically
)  

In [None]:
#batches our data using batch size of so that our model can train on a random sample.
import matplotlib.pyplot as plt
import numpy as np
batch_size = 32
img_iter = datagen.flow(x_train, y_train, batch_size=batch_size)

x, y = img_iter.next()
fig, ax = plt.subplots(nrows=4, ncols=8)
for i in range(batch_size):
    image = x[i]
    ax.flatten()[i].imshow(np.squeeze(image))
plt.show()

In [None]:
#Fitting the Data to the Generator Next, the generator must be fit on the training dataset.
datagen.fit(x_train)

# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(img_iter,
          epochs=20,
          steps_per_epoch=len(x_train)/batch_size, # Run same number of steps we would if we were not using a generator.
          validation_data=(x_valid, y_valid))

In [None]:
# Save the trained model
model.save('asl_model')

In [None]:
# Load the saved model
from tensorflow import keras

model = keras.models.load_model('asl_model')

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def show_image(image_path):
    image = mpimg.imread(image_path)
    plt.imshow(image, cmap='gray')

show_image('b.png')    

In [None]:
# Scaling the images
# Pass the same size and grayscale images into our method for prediction

from tensorflow.keras.preprocessing import image as image_utils

def load_and_scale_image(image_path):
    image = image_utils.load_img(image_path, color_mode="grayscale", target_size=(28,28))
    return image   

In [None]:
image = load_and_scale_image('b.png')
plt.imshow(image, cmap='gray') 

In [None]:
# Convert the test image into an array format
image = image_utils.img_to_array(image)

# Reshape the test image
image = image.reshape(1,28,28,1) 

# Normalize the test image
image = image / 255

#Make predictions
prediction = model.predict(image)
print(prediction)

In [None]:
# Get the highest probability of the prediction
import numpy as np
np.argmax(prediction)

In [None]:
# Alphabet does not contain j or z because they require movement
alphabet = "abcdefghiklmnopqrstuvwxy"
dictionary = {}
for i in range(24):
    dictionary[i] = alphabet[i]
dictionary

In [None]:
dictionary[np.argmax(prediction)]

In [None]:
# Put it all together
def predict_letter(file_path):
    show_image(file_path)
    image = load_and_scale_image(file_path)
    image = image_utils.img_to_array(image)
    image = image.reshape(1,28,28,1) 
    image = image/255
    prediction = model.predict(image)
    
    # convert prediction to letter
    predicted_letter = dictionary[np.argmax(prediction)]
    return predicted_letter

In [None]:
predict_letter("b.png")