In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pylab as plt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split

import os

2024-04-26 18:03:02.320684: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-26 18:03:02.320800: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-26 18:03:02.447208: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
pip install pyyaml h5py 

# **Formatting the Data:**

In [None]:
#assign the pathway to the kaggle dataset's training folder as the variable "train_folder"
train_folder = '/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'
all_data = []
#travel through everything in the training data to get the label and path and add it to all_data
for folder in os.listdir(train_folder):
	label_folder = os.path.join(train_folder, folder)
	onlyfiles = [{'label':folder,'path':os.path.join(label_folder, f)} for f in os.listdir(label_folder) if os.path.isfile(os.path.join(label_folder, f))]
	all_data += onlyfiles
#create a data frame with all the labels and pathways
data_df = pd.DataFrame(all_data)
data_df

In [None]:
x_train,x_holdout = train_test_split(data_df, test_size= 0.10, random_state=42,stratify=data_df[['label']])
x_train,x_test = train_test_split(x_train, test_size= 0.25, random_state=42,stratify=x_train[['label']])

# **Model 1 (no data aug):**

In [None]:
img_width, img_height = 64, 64
batch_size = 256
y_col = 'label'
x_col = 'path'
no_of_classes = len(data_df[y_col].unique())


train_datagen = ImageDataGenerator(rescale = 1/255.0)

train_generator = train_datagen.flow_from_dataframe(
	dataframe=x_train,x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height),class_mode='categorical', batch_size=batch_size,
	shuffle=False,
)

validation_datagen = ImageDataGenerator(rescale = 1/255.0)
validation_generator = validation_datagen.flow_from_dataframe(
	dataframe=x_test, x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height), class_mode='categorical', batch_size=batch_size,
	shuffle=False
)

holdout_datagen = ImageDataGenerator(rescale = 1/255.0)
holdout_generator = holdout_datagen.flow_from_dataframe(
	dataframe=x_holdout, x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height), class_mode='categorical', batch_size=batch_size,
	shuffle=False
)

In [None]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (64,64,3)))
model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3))) # why does input shape change to 150
model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 128, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))

model.add(Flatten())
model.add(Dense(256))
model.add(Dense(29, activation = "softmax"))

In [None]:
model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])
model.summary()

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=5)

batch_size=128
epochs=10

history = model.fit(train_generator,
                    epochs=epochs,
                    verbose=1,
                    validation_data=validation_generator,
                    callbacks = [early_stop],)

In [None]:
# make convergence plot 

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

class_names = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','nothing','space']

# Generate predictions for the validation dataset
predictions = model.predict(validation_generator)

# Convert predictions from probabilities to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Extract true labels from the validation generator
true_labels = validation_generator.classes

# Compute confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Visualize confusion matrix
plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()


In [None]:
model.save_weights('./model_one_weights.weights.h5')

# **Model 2 (with data aug):**

In [None]:
import random 

def augment_data(image):
    
    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        image = tf.image.flip_left_right(image) #flip left right
        
    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        image = tf.image.adjust_saturation(image, 0) #make grayscale
        
    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        rand = random.randint(1, 5)
        image = tf.image.adjust_saturation(image, 5) #adjusts saturation 

    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        rand = random.uniform(0.01, 0.1)
        image = tf.image.adjust_brightness(image, rand) #adjusts brightness

    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        minx = random.randint(10, 70)
        manx = random.randint(80, 100)
        tf.image.random_jpeg_quality(image, minx, manx, seed=None) #adjusts jpeg quality

    if tf.random.uniform((),minval=0,maxval=1) <0.05:
        rand = random.uniform(0.01, 0.05)
        image = tf.image.random_hue(image, rand, seed= None) #adjusts hue 
    
    return image

In [None]:
img_width, img_height = 64, 64
batch_size = 256
y_col = 'label'
x_col = 'path'
no_of_classes = len(data_df[y_col].unique())


train_datagen = ImageDataGenerator(rescale = 1/255.0, preprocessing_function=augment_data) #augements data using function we made

train_generator = train_datagen.flow_from_dataframe(
	dataframe=x_train,x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height),class_mode='categorical', batch_size=batch_size,
	shuffle=False,
)

validation_datagen = ImageDataGenerator(rescale = 1/255.0)
validation_generator = validation_datagen.flow_from_dataframe(
	dataframe=x_test, x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height), class_mode='categorical', batch_size=batch_size,
	shuffle=False
)

holdout_datagen = ImageDataGenerator(rescale = 1/255.0)
holdout_generator = holdout_datagen.flow_from_dataframe(
	dataframe=x_holdout, x_col=x_col, y_col=y_col,
	target_size=(img_width, img_height), class_mode='categorical', batch_size=batch_size,
	shuffle=False
)

In [None]:
aug_model = Sequential()

aug_model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (64,64,3)))
aug_model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
aug_model.add(Dropout(0.3))

aug_model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3))) # why does input shape change to 150
aug_model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
aug_model.add(Dropout(0.3))

aug_model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
aug_model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
aug_model.add(Dropout(0.3))

aug_model.add(Conv2D(filters = 128, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
aug_model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))

aug_model.add(Flatten())
aug_model.add(Dense(256))
aug_model.add(Dense(29, activation = "softmax"))

In [None]:
aug_model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])
aug_model.summary()

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=5)

batch_size=128
epochs=10

aug_history = aug_model.fit(train_generator,
                    epochs=epochs,
                    verbose=1,
                    validation_data=validation_generator,
                    callbacks = [early_stop],)

In [None]:
#make convergence plot

plt.plot(aug_history.history['loss'], label='Training Loss')
plt.plot(aug_history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
class_names = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','nothing','space']

# Generate predictions for the validation dataset
predictions = aug_model.predict(validation_generator)

# Convert predictions from probabilities to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Extract true labels from the validation generator
true_labels = validation_generator.classes

# Compute confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Visualize confusion matrix
plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()



In [None]:
aug_model.save_weights('./aug_model_weights.weights.h5')

# **Old Archieve:**

In [None]:
# model = Sequential()

# model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (64,64,3)))
# model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
# model.add(Dropout(0.3))

# model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3))) # why does input shape change to 150
# model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
# model.add(Dropout(0.3))

# model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
# model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
# model.add(Dropout(0.3))

# model.add(Conv2D(filters = 128, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
# model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))

# model.add(Flatten())
# model.add(Dense(256))
# model.add(Dense(29, activation = "softmax"))

In [None]:
# model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])
# model.summary()

In [None]:
# early_stop = EarlyStopping(monitor='val_loss',patience=5)

# batch_size=128
# epochs=1

# history = model.fit(train_generator,
#                     epochs=epochs,
#                     verbose=1,
#                     validation_data=validation_generator,
#                     callbacks = [early_stop],
#                     validation_freq=5)

In [None]:
# history.history['loss']

In [None]:
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

# **Using the Model (testing)**

In [None]:
def idx_to_letter(idx):
    letters = 'abcdefghijklmnopqrstuvwxyz'
    idx = idx[0]
    if 0 <= idx < len(letters):
        return letters[idx]
    if idx == 26:
        return 'nothing'
    if idx == 27:
        return '(space)'

In [None]:
# prediction = model.predict(input_data)
# print(prediction)

# classes = np.argmax(prediction, axis = 1)
# print(idx_to_letter(classes))

# **Saving the Model Weights**

https://www.tensorflow.org/tutorials/keras/save_and_load#:~:text=To%20save%20weights%20manually%2C%20use,Save%20and%20load%20models%20guide.

In [None]:
# pip install pyyaml h5py 

In [None]:
# model.save_weights('./weights.weights.h5')

# in new script:

In [None]:
# def make_cool_model():
#     model = Sequential()

#     model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (64,64,3)))
#     model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
#     model.add(Dropout(0.3))

#     model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3))) # why does input shape change to 150
#     model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
#     model.add(Dropout(0.3))

#     model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
#     model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))
#     model.add(Dropout(0.3))

#     model.add(Conv2D(filters = 128, kernel_size = (5,5),padding = 'Same',activation ='relu', input_shape = (150,150,3)))
#     model.add(MaxPooling2D(pool_size = (2,2), strides = (2,2)))

#     model.add(Flatten())
#     model.add(Dense(256))
#     # model.add(Activation('relu'))
#     model.add(Dense(29, activation = "softmax"))
#     model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])
#     return model

In [None]:
# new_model = make_cool_model()
# new_model.load_weights('./weights.weights.h5')

In [None]:
# image_path = '/kaggle/input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/C_test.jpg'
# image = keras.utils.load_img(image_path, target_size = (64,64))
# input_data = keras.utils.img_to_array(image)
# input_data = np.expand_dims(input_data, axis=0)
# prediction = new_model.predict(input_data)

# classes = np.argmax(prediction, axis = 1)
# print(idx_to_letter(classes))