In [None]:
# standard imports
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Reshape
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import zipfile
import keras
import tensorflow as tf


In [None]:
# get data #complete

# connect to environment (if mounting to Google Drive)
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    print("ENVIRONMENT: Google Drive")
except:
    print("ENVIRONMENT: Local")

# first data set

zipped_file = zipfile.ZipFile('/content/drive/My Drive/archive.zip', 'r') #data on drive not on collab vm, within drive.
zipped_file.extractall('/content/') #pull drive data and put it in contents within collab vm. better than streaming from drive
zipped_file.close()


Mounted at /content/drive
ENVIRONMENT: Google Drive


In [None]:
#second data set. 

zipped_file = zipfile.ZipFile('/content/drive/MyDrive/asl_alphabet_train (1).zip', 'r') #data on drive not on collab vm, within drive.
zipped_file.extractall('/content/') #pull drive data and put it in contents within collab vm. better than streaming from drive
zipped_file.close()

In [None]:
# instantiate generators  # in progress
# training generator/augmentation
train_datagen = ImageDataGenerator(samplewise_center=True, 
                                   samplewise_std_normalization=True, 
                                   rotation_range = 25,
                                   width_shift_range = 0.1,
                                   height_shift_range = 0.1,
                                   shear_range = 0.1,
                                   zoom_range = 0.1,
                                   fill_mode = 'nearest',
                                   validation_split = .1)

In [None]:
# remove unwanted classes
!rmdir /content/asl_alphabet_train/.ipynb_checkpoints
!rm -r /content/asl_alphabet_train/del
!rm -r /content/asl_alphabet_train/nothing
!rmdir /content/Training_Set/.ipynb_checkpoints

rmdir: failed to remove '/content/asl_alphabet_train/.ipynb_checkpoints': No such file or directory


In [None]:
# combine data sets
%cd asl_alphabet_train
!for file in *; do mv -- "$file"/* /content/Training_Set/"$file"; done

/content/asl_alphabet_train


In [None]:
# connect generators to data

# training
train_gen = train_datagen.flow_from_directory(directory = '/content/Training_Set/',
                                                    subset='training',
                                                    target_size = (64, 64),
                                                    batch_size = 64,
                                                    class_mode = 'categorical')

# validation
val_gen = train_datagen.flow_from_directory(directory = '/content/Training_Set/', 
                                            batch_size=64, 
                                            subset='validation',
                                            target_size = (64, 64), 
                                            class_mode = 'categorical')



Found 135147 images belonging to 27 classes.
Found 15004 images belonging to 27 classes.


In [None]:
print(train_gen.class_indices)


In [None]:
print(train_gen[0][0].shape)

(64, 64, 64, 3)


In [None]:
# callbacks

callback_list = []

es_callback = EarlyStopping(monitor = 'val_acc', 
                           min_delta = .001, # after each epoch we want to see the val accuracy imporve by 0.001
                           patience = 20, # if min_delta not seen after 10 epochs, stop training
                           verbose = 1,
                           restore_best_weights = True) # restore weights of peak val accuracy of that epoch

loss_plat = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', # dont use this when using 'adam' optimizer
                                 factor = 0.1,
                                 patience = 20)

# Add model check point, to save the best model during training: https://keras.io/api/callbacks/model_checkpoint/ was used.
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
                                filepath='/content/drive/My Drive/checkpoint/model.h5',
                                save_weights_only = False,
                                monitor='val_acc',
                                mode='max',
                                save_best_only=True)

callback_list.append(es_callback)
callback_list.append(model_checkpoint_callback)

In [None]:
# define model
# Used https://www.kaggle.com/code/dansbecker/running-kaggle-kernels-with-a-gpu for how to use strides, and a kernel size of 4.
model = Sequential()   # look into kernel size
model.add(Conv2D(16, kernel_size = 4, activation = 'relu', strides=1, input_shape = (64,64,3))) #relu standard for CNN. input shape is 64 x 64 image with depth of 3 = color
model.add(Conv2D(64, kernel_size= 4, strides=2, activation='relu'))
model.add(Dropout(0.5))
model.add(Conv2D(128, kernel_size=4, strides=2, activation='relu'))
model.add(Flatten())

model.add(Dense(64, activation='relu')) 
model.add(Dropout(0.5))
model.add(Dense(units = 27, activation = 'softmax')) # 27 options for classifcation. Use softmax for multi-class classification 
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 61, 61, 16)        784       
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 64)        16448     
                                                                 
 dropout (Dropout)           (None, 29, 29, 64)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 13, 13, 128)       131200    
                                                                 
 flatten (Flatten)           (None, 21632)             0         
                                                                 
 dense (Dense)               (None, 64)                1384512   
                                                                 
 dropout_1 (Dropout)         (None, 64)                0

In [None]:
# compile model

#model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])

MODEL_DIR = "/content/drive/My Drive/checkpoint/model_v8.h5"
reloaded = tf.keras.models.load_model(MODEL_DIR) # used to continue training after disconnections

In [None]:
# train model

history = reloaded.fit(
    train_gen,
    epochs=1000,
    validation_data=val_gen,
    callbacks = callback_list)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
 144/2112 [=>............................] - ETA: 5:37 - loss: 0.4654 - acc: 0.8474

In [None]:
# plot results

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training Acc.')
plt.plot(epochs, val_acc, 'b', label='Validation Acc.')
plt.title('Training vs. Validation Acc.')

plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.legend()

plt.show()

In [None]:
#save model
model.save('/content/drive/My Drive/SignLanguage_v8.h5')
#path_to_model = '/content/drive/My Drive/'

In [None]:
# test data

zipped_file = zipfile.ZipFile('/content/drive/MyDrive/asl_alphabet_test.zip', 'r') #data on drive not on collab vm, within drive.
zipped_file.extractall('/content/') #pull drive data and put it in contents within collab vm. better than streaming from drive
zipped_file.close()

In [None]:
# reformat test dirs, remove unwanted classes
%cd asl_alphabet_test
!for file in *; do mkdir "${file:0:1}"; mv -- "$file" "${file:0:1}"; done
!rmdir /content/asl_alphabet_test/.ipynb_checkpoints
!rm -r /content/asl_alphabet_test/n

In [None]:
# evaluate
model_path = '/content/drive/My Drive/checkpoint/model_v8.2.h5'
reconstructed_model = keras.models.load_model(model_path)

test_datagen = ImageDataGenerator(samplewise_center=True, samplewise_std_normalization=True)

test_gen = test_datagen.flow_from_directory(directory = '/content/asl_alphabet_test', 
                                            target_size = (64, 64), 
                                            class_mode = 'categorical')                             

reconstructed_model.evaluate(test_gen)