# EECS 4404 Group Project
## Author: Oyinkansola Ajibola, Jianhui Qi, Muhammad Fahad Sohail, Kwonmin Bok

**Original Dataset Source: Shanks0465. (2019). <i>Braille Character Dataset</i> [Data set]. Kaggle. https://www.kaggle.com/datasets/shanks0465/braille-character-dataset**

# Braille Character Dataset Description  
This dataset was created for the purpose of training a CNN for Braille Character Recognition.

**Image Description:**  
Each image is a 28x28 image in Black and White Scale.
Each image name consists of the character alphabet and the number of the image and the type of data augmentation it went through.
(i.e whs - width height shift, rot - Rotation, dim - brightness)

**Dataset composition:**  
26 characters * 3 Augmentations * 20 different images of different augmentation values
(i.e different shift,rotational and brightness values.)

# 1 - Look at the big picture and frame the problem.

## Frame the problem
1. Supervised learning
2. A multi-class classification task
3. Batch learning

## Look at the big picture
Predict the image and classify the braille character to english character.  
It will help individuals with visual impairments to read the braille without physical touching.

# 2 - Load and Preprocess the dataset

### 2.0 - Import libraries

In [1]:
# Import libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import cv2





### 2.1 - load and preprocess the dataset

In [2]:
# Set a dataset path and get all file paths
# dataset_dir = Path("./dataset")
# file_list = list(dataset_dir.glob("*.jpg"))

from tensorflow.keras.preprocessing import image_dataset_from_directory
ds_train_ = image_dataset_from_directory(
    './dataset/train_bw_imgs/',
    labels='inferred',
    label_mode='int',
    image_size=[28, 28],
    interpolation='nearest',
    shuffle=True,
)

ds_valid_bw_ = image_dataset_from_directory(
    './dataset/val_bw_imgs/',
    labels='inferred',
    label_mode='int',
    image_size=[28, 28],
    interpolation='nearest',
    shuffle=False,
)

ds_valid_col_ = image_dataset_from_directory(
    './dataset/val_col_imgs/',
    labels='inferred',
    label_mode='int',
    image_size=[28, 28],
    interpolation='nearest',
    shuffle=False,
)

# Data pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE

ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

ds_valid_bw = (
    ds_valid_bw_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

ds_valid_col = (
    ds_valid_col_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

Found 1326 files belonging to 26 classes.
Found 234 files belonging to 26 classes.
Found 234 files belonging to 26 classes.


NameError: name 'ds_train_bw_' is not defined

In [None]:
# # Create 2 arrays to store image inputs and labels
# labels = []
# images = []

# # Read image file and store RGB values into the array
# # Read the name of the image file and store its classifier
# for file in file_list:
#     image = cv2.imread(str(file))
#     images.append(image)

#     # Extract a file name of the image and transform the class to numeric value
#     # Each file name of the image starts with its class
#     # e.g.,) 'a1.JPG0dim.jpg' indicates the class of the image is 'a' and its numeric class is 0.
#     #        Numeric classes are a = 0, b = 1, ... , and z = 25
#     label = ord(os.path.basename(file)[0]) - ord('a')
#     labels.append(label)

# # Change the array to numpy array and normarlize RGB value to between 0 and 1
# images = np.array(images) / 255.0
# labels = np.array(labels)

### 2.2 - Take a quick look at the dataset

In [None]:
# To verify that the dataset is correctly transformed,
# plot 48 images from the image datasets with the class name below each image
# plt.figure(figsize = (8, 12))
# for i in range(48):
#     plt.subplot(8, 6, i + 1)
#     plt.xticks([])
#     plt.yticks([])
#     plt.grid(False)
#     plt.imshow(images[i * 20])
#     plt.xlabel(chr(labels[i * 20] + ord('a')))
# plt.show()

### 2.3 - Split dataset into training and test set

In [None]:
# Split the dataset into 80% training set and 20% test set
# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# 3 - Create and test a CNN model

### 3.1 - Create a Convolutional Neural Network model

In [None]:
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Conv2D(filter=32, kernel_size=5, activation='relu', padding='same',
                  input_shape=[28,28,3]),
    layers.MaxPool2D(pool_size=2),

    layers.Conv2D(filter=64, kernel_size=3, activation='relu', padding='same',),
    layers.MaxPool2D(pool_size=2),

    layers.Conv2D(filter=128, kernel_size=3, activation='relu', padding='same',),
    layers.MaxPool2D(pool_size=2),

    layers.Flatten(),
    layers.Dense(units=520, activation='relu'),
    layers.Dense(units=260, activation='softmax'),
    layers.Dense(units=26, activation='softmax'),
])

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
    loss='SparseCategoricalCrossentropy',
    metrics=['sparse_categorical_accuracy'],
)

In [None]:
history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=50,
)

In [None]:
model.summary()

In [None]:
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['train_accuracy', 'valid_accuracy']].plot();

Old model - Kaggle referenced

In [None]:
# Create Convolutional Neural Network model
model = keras.Sequential([
    keras.layers.Conv2D(filters=64, kernel_size=(5, 5), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Dropout(0.25),   
    keras.layers.BatchNormalization(),
    
    keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Dropout(0.25),   
    keras.layers.BatchNormalization(),

    # To complete the model, we need to dense to 26 layers(number of classes) to perform classification.
    # We need to flatten the layers first because the current output is a 3D tensor.
    # And then, densing the layers upto 26 layers.
    keras.layers.Flatten(),
    keras.layers.Dense(units=578, activation="relu"),
    keras.layers.Dropout(0.25),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(units=288, activation="relu"),
    keras.layers.Dense(units=26, activation="softmax") #output layer
])

### 3.2 - Compile the model

In [None]:
model.compile(optimizer="Adam", loss="SparseCategoricalCrossentropy", metrics=["sparse_categorical_accuracy"])

### 3.3 - Set early stopping to prevent overfitting

In [None]:
from keras.callbacks import EarlyStopping

es1 = EarlyStopping(patience=20, monitor="val_sparse_categorical_accuracy", mode="auto")
es2 = EarlyStopping(patience=20, monitor="val_loss", mode="auto")

# Trainning will be stopped if it gets 20 epochs with no improvement
history = model.fit(x=X_train,
                    y=y_train,
                    epochs=100,
                    validation_split=0.3,
                    callbacks=[es1, es2])

### 3.4 - Display summary of the model

In [None]:
model.summary()

### 3.5 - Plot the history of loss and accuracy

In [None]:
time = np.arange(1, len(history.history['loss'])+1)

In [None]:
sns.lineplot(data=history.history, x=time, y='loss')
sns.lineplot(data=history.history, x=time, y='val_loss')
plt.title('Loss fitting history')
plt.legend(labels=['Training Loss', 'Validation loss'])
plt.show()

In [None]:
sns.lineplot(data=history.history, x=time, y='val_sparse_categorical_accuracy')
sns.lineplot(data=history.history, x=time, y='sparse_categorical_accuracy')
plt.title('Accuracy fitting history')
plt.legend(labels=['Training Accuracy', 'Valuation accuracy'])
plt.show()

### 3.6 - Evaluate the model

In [None]:
print("Adam Optimizer")
model.evaluate(X_test, y_test)

Other Model - referenced by kaggle code

In [None]:
model_kag = keras.Sequential([
    keras.layers.Conv2D(filters=64, kernel_size=(5, 5), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Dropout(0.25),   
    keras.layers.BatchNormalization(),
    
    keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Dropout(0.25),   
    keras.layers.BatchNormalization(),

    keras.layers.Flatten(),
    
    keras.layers.Dense(units=576, activation="relu"),
    keras.layers.Dropout(0.25),
    keras.layers.BatchNormalization(),

    keras.layers.Dense(units=288, activation="relu"),

    keras.layers.Dense(units=26, activation="softmax") #output layer
])