In [0]:
#*********************************
# Classifier: Convolutional Neural Network using Keras
# Author: Manuel Serna-Aguilera
# This program was run using Google Colaboratory
#*********************************

In [0]:
# Import libraries
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Dropout
from keras.models import Model
import random
import sys

In [0]:
np.random.seed(0)

In [116]:
# Load the thesis project repo
! git clone https://github.com/ManuelSerna/uark-honors-thesis.git

fatal: destination path 'uark-honors-thesis' already exists and is not an empty directory.


In [0]:
# Setup
sys.path.append('uark-honors-thesis/project/')
import file_io as f
english = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
spanish = ['A', 'AA', 'B', 'C', 'D', 'E', 'EE', 'F', 'G', 'H', 'I', 'II', 'J', 'K', 'L', 'M', 'N', 'NN', 'O', 'OO', 'P', 'Q', 'R', 'S', 'T', 'U', 'UU', 'UUU', 'V', 'W', 'X', 'Y', 'Z']
all_letters = spanish
n_train = 80
n_test = 30

In [0]:
#=================================
# Transform Image
'''
Input:
  - letter: letter to query
  - num: number identifier
  - train: flag to tell function to query training or test images
Return:
  - img: m*m (grayscaled, downsized) image
'''
#=================================
def transform(letter='', num=0, train=True):
  #print('{} {}'.format(letter, num))
  
  # Query image if IDing info was given
  img = f.get_img(name=letter, num=num, training=train)

  # Grayscale image
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  
  # Downscale image to be m*m
  m = 28
  img = cv2.resize(img , (m, m), interpolation = cv2.INTER_CUBIC)
  
  # Get normalized list
  img = img/255.0 # normalize every element
  
  return img

In [0]:
#=================================
# Return Data Set
'''
Input:
  - n: number of samples (either for training and test sets)
  - train: flag to tell function whether to fetch training or test data
Return
  - tuple:
    - data: image data
    - labels: labels to corresponding index in data
'''
#=================================
def get_data(n = -1, train=True):
  num_samples = n * len(all_letters)
  data = np.zeros(shape=(num_samples, 28, 28)) # shape = (n*|all_letters|, 28, 28)
  #labels =  np.chararray(shape=(num_samples,))# (n*|all_letters|,)
  labels = np.zeros(shape=(num_samples,))

  counter = 0
  for l in range(len(all_letters)):
    #print('{}: {}'.format(l, all_letters[l]))
    for num in range(1, n+1):
      data[counter] = transform(all_letters[l], num, train)
      #labels.append(letter)
      labels[counter] = l
      counter += 1
    
  return (data, labels)

In [120]:
# Get training and test data
(X_train, y_train) = get_data(n=n_train, train=True)
(X_test, y_test) = get_data(n=n_test, train=False)

# Check that number of data equals number of labels
assert(X_train.shape[0] == y_train.shape[0]), "The number of images is not equal to the number of labels."
assert(X_train.shape[1:] == (28,28)), "The dimensions of the images are not 28 x 28."
assert(X_test.shape[0] == y_test.shape[0]), "The number of images is not equal to the number of labels."
assert(X_test.shape[1:] == (28,28)), "The dimensions of the images are not 28 x 28."

# Add fourth dim--the number of channels (just 1 for gray)
X_train = X_train.reshape(n_train*len(all_letters), 28, 28, 1)
X_test = X_test.reshape(n_test*len(all_letters), 28, 28, 1)

y_train = to_categorical(y_train, len(all_letters))
y_test = to_categorical(y_test, len(all_letters))

A 1
A 2
A 3
A 4
A 5
A 6
A 7
A 8
A 9
A 10
A 11
A 12
A 13
A 14
A 15
A 16
A 17
A 18
A 19
A 20
A 21
A 22
A 23
A 24
A 25
A 26
A 27
A 28
A 29
A 30
A 31
A 32
A 33
A 34
A 35
A 36
A 37
A 38
A 39
A 40
A 41
A 42
A 43
A 44
A 45
A 46
A 47
A 48
A 49
A 50
A 51
A 52
A 53
A 54
A 55
A 56
A 57
A 58
A 59
A 60
A 61
A 62
A 63
A 64
A 65
A 66
A 67
A 68
A 69
A 70
A 71
A 72
A 73
A 74
A 75
A 76
A 77
A 78
A 79
A 80
B 1
B 2
B 3
B 4
B 5
B 6
B 7
B 8
B 9
B 10
B 11
B 12
B 13
B 14
B 15
B 16
B 17
B 18
B 19
B 20
B 21
B 22
B 23
B 24
B 25
B 26
B 27
B 28
B 29
B 30
B 31
B 32
B 33
B 34
B 35
B 36
B 37
B 38
B 39
B 40
B 41
B 42
B 43
B 44
B 45
B 46
B 47
B 48
B 49
B 50
B 51
B 52
B 53
B 54
B 55
B 56
B 57
B 58
B 59
B 60
B 61
B 62
B 63
B 64
B 65
B 66
B 67
B 68
B 69
B 70
B 71
B 72
B 73
B 74
B 75
B 76
B 77
B 78
B 79
B 80
C 1
C 2
C 3
C 4
C 5
C 6
C 7
C 8
C 9
C 10
C 11
C 12
C 13
C 14
C 15
C 16
C 17
C 18
C 19
C 20
C 21
C 22
C 23
C 24
C 25
C 26
C 27
C 28
C 29
C 30
C 31
C 32
C 33
C 34
C 35
C 36
C 37
C 38
C 39
C 40
C 41
C 42
C 43
C 44
C 45
C 

In [0]:
#=================================
# LeNet CNN Model
# TODO: correct param names in the function
#=================================
def leNet():
  model = Sequential() # define model using sequential class
  # Start by adding a layer to the model, in this step, insert a convolutional layer.
  '''
  Arg 1: num filters, the more you have, the more computing power needed 
  Arg 2: filter size (of 5x5 for the 28x28 images)
  Arg 3: input will be fed an image that is 28x28 with one channel (greyscale, hence a depth of 1)
  Arg 4: activation function, use the ReLU function
  
  The image will be reduced to 30 feature maps, each 24x24.
  
  Padding works to preserve the spatial dimensionality of the image.
      * Same padding: making the output matrix size the same as the input.
          - Allows to extract low-level features.
  '''
  model.add(Conv2D(30, (5,5), input_shape=(28,28,1), activation='relu')) # use 30 filters of size 5x5
  
  # Add pooling layer
  model.add(MaxPooling2D(pool_size=(2, 2))) # only 1 arg: size of pooling element

  # Add another convolutional layer. Use smaller filter to extract features.
  model.add(Conv2D(15, (3,3), activation='relu')) # have 4,065 parameters, use 15 filters of size 3x3
  
  # Add another pooling layer
  model.add(MaxPooling2D(pool_size=(2, 2))) # produce a 5x5 image with a depth of 50
  
  # Take convoluted data and feed into the fully connected layer
  model.add(Flatten())
  model.add(Dense(500, activation='relu'))
  
  '''
  Use a single dropout layer.
  Although more can be used, and in different places, they are used in between layers that have a high number of parameters, these are more likely to overfit.
  
  Arg 1: fraction rate, the amount of input nodes that the dropout layer drops during each update.
  0 = no nodes dropped.
  1 = all nodes dropped.
  RECOMMENDED = 0.5
  '''
  model.add(Dropout(0.5))
  
  # Define output layer
  model.add(Dense(len(all_letters), activation='softmax'))
  model.compile(Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
  
  return model

In [122]:
# Initialize CNN model
model = leNet()
print(model.summary())

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 24, 24, 30)        780       
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 30)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 10, 10, 15)        4065      
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 5, 5, 15)          0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 375)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 500)               188000    
_________________________________________________________________
dropout_7 (Dropout)          (None, 500)              

In [123]:
# Train network
history = model.fit(X_train, y_train, epochs=10, validation_split=0.1, batch_size=400, verbose=1, shuffle=1)

Train on 1872 samples, validate on 208 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [124]:
# Get accuracy of model for current letter set
score = model.evaluate(X_test, y_test, verbose=0)
print(type(score))
print('Test accuracy:', score[1])

<class 'list'>
Test accuracy: 0.9179487228393555


In [0]:
# Calculate accuracies for each letter
accuracies = {} # dict to contain accuracy on each letter
confusion = {} # dict to contain all individual classifications

for letter in all_letters:
  matches = []

  # i. Get 30 predictions from CNN for each letter
  for letter_id in range(1, n_test+1):
    # Query image test sample
    img = f.get_img(name=letter, num=letter_id, training=False)

    # Transform image
    img = np.asarray(img)
    img = cv2.resize(img, (28, 28))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = img/255
    img = img.reshape(1, 28, 28, 1)

    # Get best match from model
    prediction = model.predict_classes(img)
    prediction = all_letters[int(prediction)]
    #print("predicted digit: {}".format(prediction))
    matches.append(prediction)
  
  # ii. Add all predictions for a single letter to the dict
  confusion[letter] = matches

  # iii. Compute accuracy of CNN on current letter
  accuracy = 0

  for match in matches:
    if match == letter:
      accuracy += 1
  
  accuracies[letter] = (accuracy/n_test)

In [0]:
# TODO: calculate overall accuracy(?) and write individual predictions onto JSON
# Write dictionary to JSON file
result_file = "results_cnn.json"
confusion_file = "all_best_matches_cnn.json"

with open(result_file, 'w') as file:
    file.write(json.dumps(accuracies, indent=4))

with open(confusion_file, 'w') as file:
    file.write(json.dumps(confusion, indent=4))
  
# In Colaboratory, specify that we want to download our files
from google.colab import files
files.download(result_file)
files.download(confusion_file)