In [1]:
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras import layers, models, callbacks, optimizers, regularizers, mixed_precision
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow_addons as tfa
mixed_precision.set_global_policy('mixed_float16')
print('TensorFlow', tf.__version__)

TensorFlow 2.10.0


In [None]:
DATA_DIR = '../data'   # change to the parent folder that contains 'train' and 'test' or 'train' only
TRAIN_SUBDIR = 'train' # or 'images_train'
TEST_SUBDIR = 'test'   # optional if you have a separate test folder
IMG_SIZE = (100, 100)  # input size used previously
BATCH_SIZE = 32
SEED = 1337
AUTOTUNE = tf.data.AUTOTUNE
NUM_EPOCHS = 30
NUM_CLASSES = None

In [None]:
train_dir = os.path.join(DATA_DIR, TRAIN_SUBDIR)
test_dir = os.path.join(DATA_DIR, TEST_SUBDIR) if os.path.isdir(os.path.join(DATA_DIR, TEST_SUBDIR)) else None

In [5]:
# If you don't have a separate test folder, we use validation_split
if test_dir is None:
    train_ds = image_dataset_from_directory(
        train_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        color_mode='grayscale',
        validation_split=0.2,
        subset='training',
        seed=SEED
    )
    val_ds = image_dataset_from_directory(
        train_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        color_mode='grayscale',
        validation_split=0.2,
        subset='validation',
        seed=SEED
    )
else:
    train_ds = image_dataset_from_directory(
        train_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        color_mode='grayscale',
        seed=SEED
    )
    val_ds = image_dataset_from_directory(
        test_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        color_mode='grayscale',
        seed=SEED
    )

Found 28800 files belonging to 36 classes.
Found 7236 files belonging to 36 classes.


In [6]:
class_names = train_ds.class_names
NUM_CLASSES = len(class_names)
print('Classes:', class_names)

Classes: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [7]:
# Build a normalization layer (scales to [0,1])
normalization_layer = layers.Rescaling(1./255)

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.25),  # Increased rotation
    layers.RandomZoom(0.2),        # Increased zoom
    layers.RandomContrast(0.2),    # Increased contrast
    layers.RandomBrightness(0.2),  # Increased brightness
    layers.RandomTranslation(0.15, 0.15),  # More translation
], name='data_augmentation')

train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y),
                        num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [8]:
def build_improved_model(input_shape=(100, 100, 1), num_classes=36):
    reg = regularizers.l2(1e-4)
    inputs = layers.Input(shape=input_shape)
    x = data_augmentation(inputs)
    x = normalization_layer(x)

    # Block 1 - More filters to capture features
    x = layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block 2
    x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block 3
    x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.4)(x)

    # Dense layers
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inputs, outputs, name='improved_cnn_isl')
    return model

model = build_improved_model()
model.summary()

Model: "improved_cnn_isl"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 100, 100, 1)]     0         
                                                                 
 data_augmentation (Sequenti  (None, 100, 100, 1)      0         
 al)                                                             
                                                                 
 rescaling (Rescaling)       (None, 100, 100, 1)       0         
                                                                 
 conv2d (Conv2D)             (None, 100, 100, 64)      640       
                                                                 
 batch_normalization (BatchN  (None, 100, 100, 64)     256       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 100, 100, 64) 

In [9]:
# Compile model with a small weight decay via AdamW-style optimizer
initial_learning_rate = 1e-3
decay_steps = len(train_ds) * 50  # 50 epochs

optimizer = tfa.optimizers.AdamW(
    learning_rate=1e-3,
    weight_decay=1e-4
)

# Callbacks: EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
checkpoint_cb = callbacks.ModelCheckpoint(
    'best_model.keras',
    save_best_only=True,
    monitor='val_accuracy',  # Changed from val_loss
    mode='max'
)

earlystop_cb = callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=15,  # Increased patience
    restore_best_weights=True,
    mode='max'
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,  # Less aggressive reduction
    patience=7,
    min_lr=1e-7,
    mode='max'
)


cbs = [checkpoint_cb, earlystop_cb, reduce_lr]

In [10]:
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=NUM_EPOCHS,
    callbacks=cbs
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Plot training & validation accuracy and loss
import matplotlib.pyplot as plt
hist = history.history

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(hist['loss'], label='train_loss')
plt.plot(hist['val_loss'], label='val_loss')
plt.title('Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend()

plt.subplot(1,2,2)
plt.plot(hist['accuracy'], label='train_acc')
plt.plot(hist['val_accuracy'], label='val_acc')
plt.title('Accuracy')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend()

plt.show()

In [None]:
# Evaluate on validation/test set
val_loss, val_acc = model.evaluate(val_ds)
print(f'Validation loss: {val_loss:.4f}, Validation accuracy: {val_acc:.4f}')

In [None]:
# Get predictions and build confusion matrix
# Collect all images and labels from val_ds
y_true = []
y_pred = []
for x_batch, y_batch in val_ds:
    preds = model.predict(x_batch)
    y_true.extend(np.argmax(y_batch, axis=1).tolist())
    y_pred.extend(np.argmax(preds, axis=1).tolist())

cm = confusion_matrix(y_true, y_pred)
print('Confusion matrix shape:', cm.shape)

In [None]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix'):
    plt.figure(figsize=(10,8))
    if normalize:
        cm = cm.astype('float') / (cm.sum(axis=1)[:, np.newaxis] + 1e-12)
    plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        val = cm[i, j]
        if normalize:
            fmt = '{:.2f}'.format(val)
        else:
            fmt = str(int(val))
        plt.text(j, i, fmt, horizontalalignment='center',
                    color='white' if val > thresh else 'black', fontsize=6)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

plot_confusion_matrix(cm, class_names, normalize=False)
plt.show()

In [None]:
# Classification report (per-class precision/recall/f1)
print(classification_report(y_true, y_pred, target_names=class_names, zero_division=0))

# Visualize misclassifications
mis_idx = [i for i, (a,b) in enumerate(zip(y_true, y_pred)) if a != b]
sample_idx = random.choice(mis_idx)
plt.imshow(list(val_ds.unbatch().map(lambda x,y: x))[sample_idx].numpy().squeeze(), cmap='gray')
plt.title(f"True: {class_names[y_true[sample_idx]]}, Pred: {class_names[y_pred[sample_idx]]}")

In [None]:
def predict_with_tta(model, dataset, num_augmentations=5):
    """Apply test-time augmentation for more robust predictions"""
    predictions = []

    for _ in range(num_augmentations):
        preds = model.predict(dataset)
        predictions.append(preds)

    # Average predictions
    final_preds = np.mean(predictions, axis=0)
    return np.argmax(final_preds, axis=1)

# Use for evaluation
y_pred_tta = predict_with_tta(model, val_ds)

In [None]:
# Analyze confusion specifically for problematic classes
problem_classes = ['c', 'f', 'm', 'o']
problem_indices = [class_names.index(c) for c in problem_classes]

for idx in problem_indices:
    class_predictions = cm[idx]
    confused_with = np.argsort(class_predictions)[-5:]  # Top 5 confusions
    print(f"\nClass '{class_names[idx]}' confused with:")
    for conf_idx in confused_with:
        if conf_idx != idx:
            print(f"  {class_names[conf_idx]}: {class_predictions[conf_idx]} times")