### Imports

In [None]:
import os
import pandas as pd
import numpy as np
import math
import tensorflow as tf

from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Rescaling

### Constants

In [None]:
DATASET_DIR = "../input/asl-alphabet"
TRAIN_DIR = os.path.join(DATASET_DIR, "asl_alphabet_train/asl_alphabet_train")
TEST_DIR = os.path.join(DATASET_DIR, "asl_alphabet_test/asl_alphabet_test")

IMAGE_SIZE = (200, 200)
BATCH_SIZE = 32

CLASSES = os.listdir(TRAIN_DIR)
NUM_CLASSES = len(CLASSES)

SEED = 17

### Load data

In [None]:
train_ds = image_dataset_from_directory(
    TRAIN_DIR,
    labels="inferred",
    validation_split=0.2,
    subset="training",
    seed=SEED,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

val_ds = image_dataset_from_directory(
    TRAIN_DIR,
    labels="inferred",
    validation_split=0.2,
    subset="validation",
    seed=SEED,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

### Visualize

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(train_ds.class_names[labels[i]])
    plt.axis("off")

### Normalize Images

In [None]:
normalize = Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalize(x), y))
val_ds = val_ds.map(lambda x, y: (normalize(x), y))

### Define the CNN Achitecture

In [None]:
inputs = Input(shape=IMAGE_SIZE+(3,), batch_size=BATCH_SIZE)

# Convolutional Base
x = Conv2D(64, kernel_size=3, padding="same", activation="relu")(inputs)
x = MaxPooling2D(2)(x)

x = Conv2D(128, kernel_size=3, padding="same", activation="relu")(x)
x = MaxPooling2D(2)(x)

x = Conv2D(256, kernel_size=3, padding="same", activation="relu")(x)
x = MaxPooling2D(2)(x)

# Head (Classifier)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs=inputs, outputs=x, name="ASL_CNN")

In [None]:
model.summary()

### Compile The Model

In [None]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics="accuracy")

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)

### Train The Model

In [None]:
epochs = 15

history = model.fit(
    train_ds, 
    validation_data=val_ds, 
    epochs=epochs,
    batch_size=BATCH_SIZE,
    shuffle=True
)

### Model Architecture Conclusions

I have experimented with various model architectures achieving maximum accuracy of 99.41 percent on the validation dataset.
<br/>Experiment strategy used:
1. Define base network architecture
2. Find Overfitting configuration
3. Eliminate overfitting
4. Optimize the network reducing the number of parameters but still reaching near ideal results.
    
The best results were achieved on three leve depth achitecture using stack of Conv2D + MaxPooling layers blocks with Conv2D width of 128, 256 and 512 (165,334,301 parameters).
<br/>The same architecture but less nodes in the layers (64, 128, 256) (41,338,525 parameters) performs about the same (99,16 percent accuracy).
<br/>From my observations adding second Conv2D layer to the block leads to the model impossibility to get better than 5 percent accuracy performance.

### Save The Model

In [None]:
df = pd.DataFrame(history.history)
df.loc[:, ["loss", "val_loss"]].plot()
df.loc[:, ["accuracy", "val_accuracy"]].plot()

In [None]:
model.save("/kaggle/working/asl_alphabet.h5")