In [2]:
# Load imports
import tensorflow as tf
import numpy as np

print(tf.__version__)
print(tf.config.list_physical_devices())

from tensorflow.keras                       import regularizers
from tensorflow.keras.models                import Sequential, Model
from tensorflow.keras.layers                import *
from tensorflow.keras.callbacks             import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers            import SGD

import matplotlib.pyplot as plt

2.14.1
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# Base variables for the project
DATASET_BASE_PATH = '../Datasets'
DATASET_PATH = DATASET_BASE_PATH + '/Letters'
TRAINED_MODELS_PATH = '../TrainedModels'
DATASET_DRIVE_ID = '1IgH1KCxtoPZNbzNQi4tcYjXY5W2wYAmW' # change this if dataset have been updated

In [4]:
#optional if you want to use neptune
#execute hidden_neptune.py to get api key
exec(open("../hidden_neptune.py").read())
using_neptune = True
import os
import neptune
os.environ["NEPTUNE_API_TOKEN"] = neptune_api_token

In [3]:
# Download dataset from drive

# if dataset folder don't exists download and unzip the dataset
import os.path
if not os.path.exists(DATASET_PATH):
    command = 'gdown https://drive.google.com/uc?id=' + DATASET_DRIVE_ID + ' -O dt.zip'
    !{command}
    command = 'unzip dt.zip -d ' + DATASET_BASE_PATH
    !{command}
    !rm dt.zip

In [4]:
#Install tensorrt
# tensorrt is a bit special to install and we didn't add it to pipenv

#check if tensorrt is installed and install it if not
try:
    import tensorrt
except ImportError:
    !pip install tensorrt==8.5.3.1

In [5]:
#Config
bs = 64 #batch size
image_side = 192

In [6]:
# Load dataset, it must contains Train and Test in categorical folders
with tf.device('/cpu:0'):
  raw_train_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH + "/Train",
    label_mode = "categorical",
    shuffle = True,
    image_size = (image_side, image_side),
    batch_size = bs)

  raw_validation_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH + "/Test",
    label_mode = "categorical",
    shuffle = True,
    image_size = (image_side, image_side),
    batch_size = bs)

## split train in 80 and 20 percent for validation
## this give fake accuracy
# raw_train_ds = tf.keras.utils.image_dataset_from_directory(
#   DATASET_PATH+"/Train",
#   label_mode = "categorical",
#   seed=123,
#   image_size=(image_side, image_side),
#   batch_size=bs,
#   validation_split=0.2,
#   subset='training',
#   )

# raw_validation_ds = tf.keras.utils.image_dataset_from_directory(
#   DATASET_PATH+"/Train",
#   label_mode = "categorical",
#   seed=123,
#   image_size=(image_side, image_side),
#   batch_size=bs,
#   validation_split=0.2,
#   subset='validation',
#   )

# tar without compression to hash the file
#command = 'tar -cf ' + DATASET_PATH +'-current.tar ' + DATASET_PATH
#!{command}

2024-01-08 17:07:56.545757: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-08 17:07:56.546215: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-08 17:07:56.546575: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Found 88008 files belonging to 24 classes.
Found 720 files belonging to 24 classes.


In [7]:
# define class names
class_names = raw_train_ds.class_names
print(class_names)

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']


In [8]:
#normalize from 0-255 to 0-1
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = raw_train_ds.map(lambda x, y: (normalization_layer(x), y))
validation_ds = raw_validation_ds.map(lambda x, y: (normalization_layer(x), y))

In [None]:
# Show images without data augmentation
plt.figure(figsize=(7, 7))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy()) ## use .astype("uint8") if images goes from 0 to 255 else float from 0 to 1
    plt.title(class_names[np.argmax(labels[i])])
    plt.axis("off")

In [9]:
# Config of data augmentation
data_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(0.01),
        tf.keras.layers.RandomZoom(0.02),
        tf.keras.layers.RandomTranslation(0.08, 0.08, fill_mode='nearest', fill_value=0.5),
        tf.keras.layers.RandomBrightness([-0.15,0.1],value_range=(0, 1)),
        #tf.keras.layers.RandomCrop(25,25),
        tf.keras.layers.RandomContrast(0.02)
    ]
)
data_augmentation.build((None, image_side, image_side, 3)) ## this is important to prevent usage of data augmentation change his shape

In [None]:
# Test augmentation
plt.figure(figsize=(7, 7))
for images, labels in train_ds.take(1):
  #images contains a batch of images
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(data_augmentation(images[i].numpy())) ## use .astype("uint8") if images goes from 0 to 255 else float from 0 to 1
    plt.title(class_names[np.argmax(labels[i])])
    plt.axis("off")

In [10]:
#config model params
model_params = {
    "dropout1": 0.1,
    "dropout2": 0.25,
    "dense": 64,
    "l2reg": 0.015
}

if using_neptune: # log model params to neptune
    log_model_version = neptune.init_model_version(
        model="AL-ALPH1",
        project="SignLanguageRecognition/AlphabetRecognition",
        api_token=neptune_api_token
    )
    log_model_version["params/model"] = model_params

    log_model_version_url = log_model_version.get_url()

    log_model_version.stop()

https://app.neptune.ai/SignLanguageRecognition/AlphabetRecognition/m/AL-ALPH1/v/AL-ALPH1-59
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.
All 4 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/SignLanguageRecognition/AlphabetRecognition/m/AL-ALPH1/v/AL-ALPH1-59/metadata


In [None]:
# reset/init model with model params

# VGG19
#VGG19_model = tf.keras.applications.VGG19
# Freeze the 6 first layers of the model to not train them
#for layer in VGG19_model.layers[:6]:
#  layer.trainable = False

# base_model
base_model = tf.keras.applications.InceptionV3(input_shape=(image_side,image_side,3), include_top=False, weights='imagenet')

# Create the model
model = tf.keras.Sequential([
  data_augmentation,
  #VGG19_model,
  base_model,

##base
  Flatten(),
  Dropout(model_params["dropout1"]),
  Dense(model_params["dense"], kernel_regularizer=regularizers.l2(model_params["l2reg"]), activation = 'relu'),
  Dropout(model_params["dropout2"]),
  Dense(len(class_names), activation = 'softmax')
])

model.build((None, image_side, image_side, 3))
model.summary()

In [None]:
## compile and train the model

learning_rate = 0.0004
epochs = 1

# Log the run to Neptune
if using_neptune:
    run = neptune.init_run(project="SignLanguageRecognition/AlphabetRecognition", api_token=neptune_api_token, capture_hardware_metrics=True, capture_stdout=True, capture_stderr=False)
    params = {
        "learning_rate": learning_rate, 
        "optimizer": "SGD",
        "base_model": base_model.name,
        "image_side": image_side,
        "epochs": epochs,
        "batch_size": bs
        }

    run["parameters"] = params
    run["model_version_url"] = log_model_version_url

    ##too slow find a better way
    #run["dataset/train"].track_files(DATASET_PATH + "/Train")
    #run["dataset/test"].track_files(DATASET_PATH + "/Test")
    run["dataset/current"].track_files(DATASET_PATH + "-current.tar")


model.compile(loss="categorical_crossentropy", optimizer= SGD(learning_rate=learning_rate), metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='model', verbose=1, save_best_only=True, monitor = 'val_accuracy', mode = 'max') # this save the best model
#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.000001) # this reduce learning rate when val_loss is not improving
#early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) # this stop training when val_loss is not improving

def epochCallback(epoch, logs):
    if using_neptune:
        run["train/loss"].log(logs["loss"])
        run["train/accuracy"].log(logs["accuracy"])
        run["eval/loss"].log(logs["val_loss"])
        run["eval/accuracy"].log(logs["val_accuracy"])

history = None
try:
    history = model.fit(train_ds, validation_data = validation_ds, epochs=epochs,
                                callbacks = [
    #                                reduce_lr,
                                    checkpointer,
                                    tf.keras.callbacks.LambdaCallback(on_epoch_end=epochCallback)
    #                                early_stop
                                ])
    if using_neptune:
        model.save("current_run_model.h5")
        run["model_weights"].upload("current_run_model.h5")
        run.stop()
except:
    if using_neptune:
        run.stop() # make sure to stop the run if something goes wrong
    raise

In [None]:
# inceptionV3 test
# this is a copy example from https://keras.io/api/applications/
base_model = tf.keras.applications.InceptionV3

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer — let's say we have 200 classes
predictions = Dense(len(class_names), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)


# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# train the model on the new data for a few epochs
checkpointer = ModelCheckpoint(filepath='model', verbose=1, save_best_only=True, monitor = 'val_accuracy', mode = 'max')

history = model.fit(train_ds, validation_data = validation_ds, epochs=1,
                            callbacks = [
                                checkpointer
                            ])

# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
   layer.trainable = False
for layer in model.layers[249:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
history = model.fit(train_ds, validation_data = validation_ds, epochs=15,
                            callbacks = [
                                checkpointer
                            ])

In [None]:
#tensorflow hyperparameter tuning

##TODO batch size and image side tuning
##TODO freeze layers tuning

import keras_tuner as kt

resNet = tf.keras.applications.ResNet152(input_shape=(image_side,image_side,3),
                                          include_top=False,
                                          weights='imagenet')
checkpointer = ModelCheckpoint(filepath='model', verbose=1, save_best_only=True, monitor = 'val_accuracy', mode = 'max')

def model_builder(hp):

    dropOut1 = hp.Float('dropout1', min_value=0.1, max_value=0.5, step=0.05)
    dropOut2 = hp.Float('dropout2', min_value=0.1, max_value=0.5, step=0.05)
    dense1 = hp.Int('dense1', min_value=32, max_value=256, step=32)
    l2reg = hp.Float('l2reg', min_value=0.005, max_value=0.05, step=0.005)
    lrate1 = hp.Float('learning_rate', min_value=0.0001, max_value=0.01, step=0.0001)

    model = tf.keras.Sequential()
    model.add(data_augmentation)
    model.add(resNet)
    model.add(Flatten())
    model.add(Dropout(dropOut1))
    model.add(Dense(dense1, kernel_regularizer=regularizers.l2(l2reg), activation = 'relu'))
    model.add(Dropout(dropOut2))
    model.add(Dense(len(class_names), activation = 'softmax'))

    model.compile(loss="categorical_crossentropy",
              optimizer= SGD(learning_rate=lrate1),
              metrics=['accuracy'])

    return model

tuner = kt.Hyperband(model_builder,
                    objective='val_accuracy',
                    max_epochs=epochs,
                    factor=3,
                    directory='my_dir',
                    project_name='intro_to_kt')

tuner.search(train_ds, validation_data = validation_ds, epochs=epochs, callbacks=[checkpointer])


In [None]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
plt.plot(history.history['val_loss'])
plt.plot(history.history['loss'])
plt.title("Loss")
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['val_loss','loss'])
plt.show()

In [None]:
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['accuracy'])
plt.title("Accuracy")
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['val_accuracy','accuracy'])
plt.show()

In [None]:
name="letters_new5_model_192x192_74.0pc_inceptionv3"
model.save(name + ".keras")
model.save(name + ".h5")

In [None]:
## load model from zero
model = tf.keras.models.load_model("model")

In [None]:
model.evaluate(validation_ds)

In [None]:
# Show confusion matrix

from sklearn.metrics import ConfusionMatrixDisplay
import numpy as np


## generamos predicciones

y_pred = []  # store predicted labels
y_true = []  # store true labels

# iterate over the dataset
for image_batch, label_batch in validation_ds:   # use dataset.unbatch() with repeat
   # append true labels
   y_true.append(label_batch)
   # compute predictions
   preds = model.predict(image_batch)
   # append predicted labels
   y_pred.append(np.argmax(preds, axis = - 1))

# convert the true and predicted labels into tensors
correct_labels = tf.concat([item for item in y_true], axis = 0)
predicted_labels = tf.concat([item for item in y_pred], axis = 0)

# convertimos el array en el que necesita sklearn
final_correct_labels = []
for lb in correct_labels:
  itlist=list(lb.numpy())
  final_correct_labels.append(itlist.index(1.0))

ConfusionMatrixDisplay.from_predictions(final_correct_labels, predicted_labels, display_labels=class_names, cmap=plt.cm.Blues)

In [None]:
# generate failed predictions to see what is happening
failed_predictions = []

# iterate over the dataset
for image_batch, label_batch in train_ds:   # train_ds or validation_ds
   
    # compute predictions
    preds = model.predict(image_batch)

    # append failed predictions to failed_predictions
    for i in range(len(preds)):
        if np.argmax(preds[i]) != np.argmax(label_batch[i]):
#            if class_names[np.argmax(label_batch[i])] == 'N':
                failed_predictions.append([image_batch[i], label_batch[i], preds[i]])

In [None]:
rindex = 0

In [None]:
#show random image from failed predictions

rindex += 1

#check if rindex is defined
if rindex >= len(failed_predictions):
    rindex = 0

print("Index of failed prediction: ", rindex)
# show image with plt
plt.title("True: "+class_names[np.argmax(failed_predictions[rindex][1])]+", Pred: "+class_names[np.argmax(failed_predictions[rindex][2])])
plt.imshow(failed_predictions[rindex][0])

In [10]:
#model_tester

model_list = [

    tf.keras.applications.ConvNeXtBase,
    tf.keras.applications.ConvNeXtLarge,
    tf.keras.applications.ConvNeXtSmall,
    tf.keras.applications.ConvNeXtTiny,
    tf.keras.applications.ConvNeXtXLarge,
    tf.keras.applications.DenseNet121,
    tf.keras.applications.DenseNet169,
    tf.keras.applications.DenseNet201,
    # tf.keras.applications.EfficientNetB0,
    # tf.keras.applications.EfficientNetB1,
    # tf.keras.applications.EfficientNetB2,
    # tf.keras.applications.EfficientNetB3,
    # tf.keras.applications.EfficientNetB4,
    # tf.keras.applications.EfficientNetB5,
    # tf.keras.applications.EfficientNetB6,
    # tf.keras.applications.EfficientNetB7,
    # tf.keras.applications.EfficientNetV2B0,
    # tf.keras.applications.EfficientNetV2B1,
    # tf.keras.applications.EfficientNetV2B2,
    # tf.keras.applications.EfficientNetV2B3,
    # tf.keras.applications.EfficientNetV2L,
    # tf.keras.applications.EfficientNetV2M,
    # tf.keras.applications.EfficientNetV2S,
    tf.keras.applications.InceptionResNetV2,
    tf.keras.applications.InceptionV3,
    tf.keras.applications.MobileNet,
    tf.keras.applications.MobileNetV2,
    tf.keras.applications.MobileNetV3Large,
    tf.keras.applications.MobileNetV3Small,
    tf.keras.applications.NASNetLarge,
    tf.keras.applications.NASNetMobile,
    tf.keras.applications.RegNetX002,
    tf.keras.applications.RegNetX004,
    tf.keras.applications.RegNetX006,
    tf.keras.applications.RegNetX008,
    tf.keras.applications.RegNetX016,
    tf.keras.applications.RegNetX032,
    tf.keras.applications.RegNetX040,
    tf.keras.applications.RegNetX064,
    tf.keras.applications.RegNetX080,
    tf.keras.applications.RegNetX120,
    tf.keras.applications.RegNetX160,
    tf.keras.applications.RegNetX320,
    tf.keras.applications.RegNetY002,
    tf.keras.applications.RegNetY004,
    tf.keras.applications.RegNetY006,
    tf.keras.applications.RegNetY008,
    tf.keras.applications.RegNetY016,
    tf.keras.applications.RegNetY032,
    tf.keras.applications.RegNetY040,
    tf.keras.applications.RegNetY064,
    tf.keras.applications.RegNetY080,
    tf.keras.applications.RegNetY120,
    tf.keras.applications.RegNetY160,
    tf.keras.applications.RegNetY320,
    tf.keras.applications.ResNet101,
    tf.keras.applications.ResNet152,
    tf.keras.applications.ResNet50,
    tf.keras.applications.ResNetRS101,
    tf.keras.applications.ResNetRS152,
    tf.keras.applications.ResNetRS200,
    tf.keras.applications.ResNetRS270,
    tf.keras.applications.ResNetRS350,
    tf.keras.applications.ResNetRS420,
    tf.keras.applications.ResNetRS50,
    tf.keras.applications.ResNet101V2,
    tf.keras.applications.ResNet152V2,
    tf.keras.applications.ResNet50V2,
    tf.keras.applications.VGG16,
    tf.keras.applications.VGG19,
    tf.keras.applications.Xception

]


In [None]:
def build(base_model, model_params):
    # Create the model
    model = tf.keras.Sequential([
        data_augmentation,
        #VGG19_model,
        base_model,

        ##base
        Flatten(),
        Dropout(model_params["dropout1"]),
        Dense(model_params["dense"], kernel_regularizer=regularizers.l2(model_params["l2reg"]), activation = 'relu'),
        Dropout(model_params["dropout2"]),
        Dense(len(class_names), activation = 'softmax')
    ])

    model.build((None, image_side, image_side, 3))
    return model

def train(base_model, model, epochs, learning_rate, log_model_version_url):
    # Log the run to Neptune
    run = neptune.init_run(project="SignLanguageRecognition/AlphabetRecognition", api_token=neptune_api_token, capture_hardware_metrics=True, capture_stdout=True, capture_stderr=True)
    params = {
        "learning_rate": learning_rate, 
        "optimizer": "SGD",
        "base_model": base_model.name,
        "image_side": image_side,
        "epochs": epochs,
        "batch_size": bs
        }

    run["parameters"] = params
    run["model_version_url"] = log_model_version_url
    run["status"] = "running"


    model.compile(loss="categorical_crossentropy", optimizer= SGD(learning_rate=learning_rate), metrics=['accuracy'])

    def epochCallback(epoch, logs):
        run["train/loss"].log(logs["loss"])
        run["train/accuracy"].log(logs["accuracy"])
        run["eval/loss"].log(logs["val_loss"])
        run["eval/accuracy"].log(logs["val_accuracy"])

    
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) # this stop training when val_loss is not improving

    try:
        model.fit(train_ds, validation_data = validation_ds, epochs=epochs,
                                    callbacks = [
                                        tf.keras.callbacks.LambdaCallback(on_epoch_end=epochCallback),
                                        early_stop
                                    ])
        if using_neptune:
            run["status"] = "finished"
            model.save("current_run_model.h5")
            run["model_weights"].upload("current_run_model.h5")
            
            run.stop()

    except tf.errors.ResourceExhaustedError:
        if using_neptune:
            run["status"] = "crashed-ResourceExhausted"
            run.stop() # make sure to stop the run if something goes wrong
    except KeyboardInterrupt:
        if using_neptune:
            run["status"] = "Interrupted keyboard"
            run.stop() # make sure to stop the run if something goes wrong
        raise

for base_model_func in model_list:

    base_model = base_model_func(input_shape=(image_side,image_side,3), include_top=False, weights='imagenet')

    #config model params
    model_params = {
        "dropout1": 0.1,
        "dropout2": 0.25,
        "dense": 64,
        "l2reg": 0.015
    }

    log_model_version = neptune.init_model_version(
        model="AL-ALPH1",
        project="SignLanguageRecognition/AlphabetRecognition",
        api_token=neptune_api_token
    )
    log_model_version["params/model"] = model_params

    log_model_version_url = log_model_version.get_url()

    log_model_version.stop()


    model = build(base_model, model_params)

    learning_rate = 0.001
    epochs = 50

    train(base_model, model, epochs, learning_rate, log_model_version_url)

    del model
    del base_model
    