# SignABC - BISINDO Sign Language Learning App for Children

<div class="alert alert-block alert-success"> 
📌 This notebook is created for a capstone project, we are creating a learning app for children to help them learn about BISINDO sign language.
</div>

# Import Dependencies

In [None]:
# !pip install tensorflowjs

In [None]:
import os
import random
import shutil
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import pathlib
import numpy as np
import PIL
import PIL.Image
import tensorflow_hub as hub
# import tensorflowjs as tfjs

from google.colab import drive, files
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array

# Setup

In [None]:
# Mount google drive
# drive.mount('/content/drive/')

# Unzip dataset file
# !unzip "/content/drive/MyDrive/Capstone Project ML/Zipped dataset/Dataset_Experimen_Khresna/Dataset_KhresnaV4(NoSplit, NoPersonInBackground, NoAug).zip" -d "/content"
# !unzip "/content/drive/Shareddrives/Capstone ML/Dataset Shared Drive/Dataset_BintangV0.zip" -d "/content"

In [None]:
# Dataset path
path = "/content/Dataset_BintangV0"
data_dir = pathlib.Path(path)

# Image count
image_count = len(list(data_dir.glob('*/*.jpg'))) + len(list(data_dir.glob('*/*.JPG'))) + len(list(data_dir.glob('*/*.png'))) + len(list(data_dir.glob('*/*.PNG')))
print(image_count)

13000


# Dataset

In [None]:
# Dataset properties
batch_size = 100
img_height = 224
img_width = 224

In [None]:
# Training dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 13000 files belonging to 26 classes.
Using 10400 files for training.


In [None]:
# Testing dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 13000 files belonging to 26 classes.
Using 2600 files for validation.


In [None]:
# Labels list
class_names = np.array(train_ds.class_names)
print(class_names)

['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R'
 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z']


In [None]:
# Dataset sample

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

In [None]:
# # Normalize dataset
# normalization_layer = tf.keras.layers.Rescaling(1./255)
# train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) 
# val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

In [None]:
# # Autotune cache and paralellization to improve performance
# AUTOTUNE = tf.data.AUTOTUNE

# train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
# val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
# Augmentation layer
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal"),
  tf.keras.layers.RandomContrast(factor=0.8),
])

In [None]:
# Normalizization layer
normalization_layer = tf.keras.layers.Rescaling(1./255)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

def preprocess(dataset, shuffle=False, augment=False):
  # Resize and rescale all datasets.
  dataset = dataset.map(lambda x, y: (normalization_layer(x), y), 
              num_parallel_calls=AUTOTUNE)

  if shuffle:
    dataset = dataset.shuffle(1000)

  # Use data augmentation only on the training set.
  if augment:
    dataset = dataset.map(lambda x, y: (data_augmentation(x, training=True), y), 
                num_parallel_calls=AUTOTUNE)

  # Use buffered prefetching on all datasets.
  return dataset.cache().prefetch(buffer_size=AUTOTUNE)
  # return dataset

In [None]:
# Apply preprocessing to the dataset
train_ds = preprocess(train_ds, shuffle=True, augment=False)
val_ds = preprocess(val_ds)

# Without Transfer Learning



## Model Architecture

In [None]:
# Method to create model
def create_uncompiled_model():

    num_classes = len(class_names)

    model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32, 5, input_shape=(img_height, img_width, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(64, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(64, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dense(num_classes, activation = 'softmax')
    ])

    return model

## Tune Learning Rate

In [None]:
# Method to find the 'best' learning rate
def adjust_learning_rate(dataset):
    
    model = create_uncompiled_model()
    
    lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(epoch / 20))
    
    optimizer = tf.keras.optimizers.Adam()
    
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    
    history = model.fit(dataset, epochs=100, callbacks=[lr_schedule])
    
    return history

In [None]:
Find the 'best' learning rate
lr_history = adjust_learning_rate(train_ds)

In [None]:
Plot the result
plt.semilogx(lr_history.history["lr"], lr_history.history["loss"])
plt.axis([1e-4, 10, 0, 10])

> The 'best' learning rate would be around 1e-3 as that area has quite stable loss compared to the other area.

## Train Model

In [None]:
model = create_uncompiled_model()

model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
  metrics=['accuracy'])

history = model.fit(train_ds, validation_data=val_ds, epochs=20)

## Model Evaluation

In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.show()
print("")

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.show()

## Save Model

In [None]:
# export_dir = 'saved_model/without_transfer_no_nothing'
# tf.saved_model.save(model, export_dir)

In [None]:
# model.save('saved_models/model_noaug_v3_0') 

In [None]:
# !zip -r "/content/model_noaug_v3_0.zip" "/content/saved_models/model_noaug_v3_0"

In [None]:
# !tensorflowjs_converter --input_format=keras_saved_model /content/saved_models/model_noaug_v3_0 ./

# With Transfer Learning



## Load Pre-trained Model

In [None]:
mobilenet_v2 = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
inception_v3 = "https://tfhub.dev/google/tf2-preview/inception_v3/feature_vector/4"
efficientnet_v0 = "https://tfhub.dev/google/efficientnet/b0/feature-vector/1"
efficientnet_v0_lite = "https://tfhub.dev/tensorflow/efficientnet/lite0/feature-vector/2"

feature_extractor_model = mobilenet_v2 #@param ["mobilenet_v2", "inception_v3", "efficientnet_v0", "efficientnet_v0_lite"] {type:"raw"}

In [None]:
# Create layer from pre-trained model
feature_extractor_layer = hub.KerasLayer(
    feature_extractor_model,
    input_shape=(224, 224, 3),
    trainable=False)

## Model Architecture

In [None]:
# Method to create model
def create_uncompiled_model():

    num_classes = len(class_names)

    model = tf.keras.Sequential([
      feature_extractor_layer,
      tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

    return model

## Tune Learning Rate

In [None]:
# Method to find the 'best' learning rate
def adjust_learning_rate(dataset):
    
    model = create_uncompiled_model()
    
    lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(epoch / 20))
    
    optimizer = tf.keras.optimizers.Adam()
    
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    
    history = model.fit(dataset, epochs=100, callbacks=[lr_schedule])
    
    return history

In [None]:
Find the 'best' learning rate
lr_history = adjust_learning_rate(train_ds)

In [None]:
Plot the result
plt.semilogx(lr_history.history["lr"], lr_history.history["loss"])
plt.axis([1e-4, 10, 0, 10])

> The 'best' learning rate would be around 0.005 as that area has quite 
stable loss compared to the other area.

## Train Model

In [None]:
model_tl = create_uncompiled_model()

model_tl.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=0.004),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
  metrics=['accuracy'])

history = model_tl.fit(train_ds, validation_data=val_ds, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Evaluation

In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.show()
print("")

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.show()

## Test with Uploaded Image

In [None]:
# # Upload image(s) and predict
# uploaded=files.upload()

# # Load images
# for fn in uploaded.keys():
 
#   path='/content/' + fn
#   img=load_img(path, target_size=(img_height, img_width))
  
#   x=img_to_array(img)
#   x /= 255
#   x=np.expand_dims(x, axis=0)
#   images = np.vstack([x])
  
#   # Predict 
#   predicted_batch = model.predict(images)
#   predicted_id = tf.math.argmax(predicted_batch, axis=-1)
#   predicted_label_batch = class_names[predicted_id]
#   print(predicted_label_batch)

In [None]:
# # Upload manually
# path = '/content/dump/Photo on 26-05-23 at 19.01 copy.jpg'

# image = PIL.Image.open(path)
# image = image.resize((img_height, img_width), 1)
# x=img_to_array(image)
# x /= 255
# x=np.expand_dims(x, axis=0)
# images = np.vstack([x])

# predicted_batch = model_tl.predict(images)
# predicted_id = tf.math.argmax(predicted_batch, axis=-1)
# predicted_label_batch = class_names[predicted_id]
# print(predicted_label_batch)

In [None]:
# Load test samples for specific class

def create_test_generator(chosen_class):
  path = '/content/drive/MyDrive/Capstone Project ML/Test samples/{}-samples'.format(chosen_class)

  test_datagen = ImageDataGenerator(rescale=1/255.)

  test_generator = test_datagen.flow_from_directory(path,
                                classes=[chosen_class],
                                class_mode=None,
                                shuffle=False,
                                target_size=(224, 224))
  return test_generator

In [None]:
labels = ['A', 'B', 'C', 'D', 'E', 'F']

for label in labels:
  print("========================= Making predictions for class {} =========================".format(label))

  test_generator = create_test_generator(label)
  predictions = model_tl.predict_generator(test_generator)

  for prediction in predictions:
    print(class_names[tf.math.argmax(prediction, axis=-1)])

Found 5 images belonging to 1 classes.


  predictions = model_tl.predict_generator(test_generator)


A
A
A
A
A
Found 10 images belonging to 1 classes.
B
B
M
B
K
M
B
B
B
B
Found 10 images belonging to 1 classes.
C
C
C
E
E
K
K
K
K
I
Found 10 images belonging to 1 classes.
K
K
K
K
K
T
D
T
T
D
Found 10 images belonging to 1 classes.
E
E
E
Z
Z
E
E
E
Z
E
Found 10 images belonging to 1 classes.
F
F
F
F
F
F
F
F
F
F


## Save Model

In [None]:
# model_tl.save('model_tl_noaug_bv0_0') 

# converter = tf.lite.TFLiteConverter.from_saved_model('model_tl_noaug_bv0_0')
# converter.optimizations = [tf.lite.Optimize.DEFAULT]

# tflite_model = converter.convert()
# tflite_model_file = 'model_tl_noaug_bv0_0.tflite'

# with open(tflite_model_file, "wb") as f:
#     f.write(tflite_model)

# !zip -r "/content/model_tl_noaug_bv0_0.zip" "/content/model_tl_noaug_bv0_0"

  adding: content/model_tl_noaug_bv0_0/ (stored 0%)
  adding: content/model_tl_noaug_bv0_0/keras_metadata.pb (deflated 80%)
  adding: content/model_tl_noaug_bv0_0/fingerprint.pb (stored 0%)
  adding: content/model_tl_noaug_bv0_0/variables/ (stored 0%)
  adding: content/model_tl_noaug_bv0_0/variables/variables.data-00000-of-00001 (deflated 8%)
  adding: content/model_tl_noaug_bv0_0/variables/variables.index (deflated 78%)
  adding: content/model_tl_noaug_bv0_0/assets/ (stored 0%)
  adding: content/model_tl_noaug_bv0_0/saved_model.pb (deflated 92%)


In [None]:
# !zip -r "/content/model_tl_noaug_v4_3.zip" "/content/model_tl_noaug_v4_3"

In [None]:
# import time
# saved_model_path = "./{}.h5".format(int(time.time()))

# model_tl.save(saved_model_path)

In [None]:
# !tensorflowjs_converter --input_format=keras_saved_model --output_format=tfjs_graph_model inception_no_nothing_class ./

In [None]:
# !pip install tensorrt

In [None]:
# !tensorflowjs_converter --input_format=keras_saved_model /content/saved_models/model_tl_noaug_v4_01 ./

In [None]:
# !pip install --upgrade --index-url https://pypi.ngc.nvidia.com nvidia-tensorrt

In [None]:
# !tensorflowjs_converter --input_format=keras --output_format=tfjs_graph_model model_tl_noaug_v4_1.h5 ./ 