Este é o notebook que foi utilizado no colab (para processamento e memória) para treinar os modelos de fato

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Copy the file from your Google Drive's main folder
!cp /content/drive/MyDrive/data2.zip .

# Unzip the file
!unzip data2.zip

#### Data formatting for training and eval

In [None]:
import tensorflow as tf

# --- Configuration ---
BATCH_SIZE = 32
IMG_HEIGHT = 30
IMG_WIDTH = 40
IMG_SHAPE = (IMG_HEIGHT, IMG_WIDTH)

# --- Define paths ---
caminho_train = 'data/augmented_data'
caminho_eval = 'data/processed_eval'

# --- Load Training and Validation Data ---
train_ds = tf.keras.utils.image_dataset_from_directory(
    caminho_train,
    labels='inferred',
    label_mode='int',
    color_mode='grayscale',
    image_size=IMG_SHAPE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=123
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    caminho_eval,
    labels='inferred',
    label_mode='int',
    color_mode='grayscale',
    image_size=IMG_SHAPE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# --- THE FIX: Get class_names BEFORE you modify the dataset ---
class_names = train_ds.class_names
print(f"✅ Found {len(class_names)} classes: {class_names}")
# -------------------------------------------------------------

# --- Normalize and Optimize ---
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=tf.data.AUTOTUNE)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("✅ Data preparation complete and optimized.")

Found 251471 files belonging to 35 classes.
Found 10650 files belonging to 35 classes.
✅ Found 35 classes: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
✅ Data preparation complete and optimized.


### Model construction, training and eval

#### modular CNN function

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout

def create_model(input_shape, num_classes, filters_per_layer=[32, 64], dense_units=128):
    """Creates a flexible CNN model."""

    model = Sequential()
    model.add(tf.keras.Input(shape=input_shape))

    # Convolutional blocks
    for filters in filters_per_layer:
        model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu', padding='same'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

    # Classifier head
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5)) # Dropout is a key regularizer
    model.add(Dense(num_classes, activation='softmax', dtype='float32')) # Output layer

    return model

#### Find best learning rate

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

# --- Setup ---
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 1)
NUM_CLASSES = len(class_names) # Get number of classes from the loaded data

# --- Define Your Experiments ---
experiments = [
    {'id': 'lr_1e-3', 'learning_rate': 1e-3},
    {'id': 'lr_5e-4', 'learning_rate': 5e-4},
    {'id': 'lr_1e-4', 'learning_rate': 1e-4},
]

for config in experiments:
    print(f"\n--- Running Experiment: {config['id']} ---")

    model = create_model(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate']),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    model.summary()

    log_dir = os.path.join("logs", config['id'])
    model_path = os.path.join("models", f"{config['id']}_best_model.keras")
    os.makedirs(os.path.dirname(model_path), exist_ok=True)

    callbacks = [
        TensorBoard(log_dir=log_dir),
        ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_accuracy', mode='max'),
        EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    ]

    history = model.fit(
        train_ds,
        epochs=100, # Set high, EarlyStopping will handle it
        validation_data=val_ds,
        callbacks=callbacks
    )


--- Running Experiment: lr_1e-3 ---


Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 7ms/step - accuracy: 0.9280 - loss: 0.2808 - val_accuracy: 0.9952 - val_loss: 0.0192
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9929 - loss: 0.0237 - val_accuracy: 0.9941 - val_loss: 0.0204
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9959 - loss: 0.0134 - val_accuracy: 0.9945 - val_loss: 0.0227
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 4ms/step - accuracy: 0.9968 - loss: 0.0102 - val_accuracy: 0.9962 - val_loss: 0.0156
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 4ms/step - accuracy: 0.9976 - loss: 0.0074 - val_accuracy: 0.9960 - val_loss: 0.0240
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 4ms/step - accuracy: 0.9979 - loss: 0.0072 - val_accuracy: 0.9959 - val_loss: 0.0243
Epoc

Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.9095 - loss: 0.3489 - val_accuracy: 0.9957 - val_loss: 0.0163
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9938 - loss: 0.0218 - val_accuracy: 0.9954 - val_loss: 0.0177
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9963 - loss: 0.0120 - val_accuracy: 0.9938 - val_loss: 0.0228
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 4ms/step - accuracy: 0.9975 - loss: 0.0079 - val_accuracy: 0.9950 - val_loss: 0.0209
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.9981 - loss: 0.0063 - val_accuracy: 0.9963 - val_loss: 0.0155
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 4ms/step - accuracy: 0.9985 - loss: 0.0048 - val_accuracy: 0.9952 - val_loss: 0.0208
Epoc

Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 5ms/step - accuracy: 0.8377 - loss: 0.6569 - val_accuracy: 0.9947 - val_loss: 0.0246
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 5ms/step - accuracy: 0.9917 - loss: 0.0386 - val_accuracy: 0.9948 - val_loss: 0.0199
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 5ms/step - accuracy: 0.9962 - loss: 0.0170 - val_accuracy: 0.9961 - val_loss: 0.0151
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9974 - loss: 0.0108 - val_accuracy: 0.9956 - val_loss: 0.0186
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.9983 - loss: 0.0071 - val_accuracy: 0.9964 - val_loss: 0.0138
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0053 - val_accuracy: 0.9954 - val_loss: 0.0191
Epoc

#### Learning the best capacity and dropout rate

In [None]:
# --- Updated create_model function to accept variable dropout rates ---
def create_model(input_shape, num_classes, filters_per_layer=[32, 64], dense_units=128, dropout_rate=0.5):
    """Creates a flexible CNN model with adjustable dropout."""
    model = Sequential()
    model.add(tf.keras.Input(shape=input_shape))

    for filters in filters_per_layer:
        model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu', padding='same'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax', dtype='float32'))

    return model

# --- Experiment Set A: Find Ideal Capacity ---
BEST_LEARNING_RATE = 5e-4

capacity_experiments = [
    {'id': 'capacity_small', 'filters': [16, 32], 'dense': 64},
    {'id': 'capacity_medium', 'filters': [32, 64], 'dense': 128}, # Baseline
    {'id': 'capacity_large2', 'filters': [64, 128], 'dense': 256},
]

for config in capacity_experiments:
    print(f"\n--- Running Capacity Experiment: {config['id']} ---")
    model = create_model(
        input_shape=INPUT_SHAPE,
        num_classes=NUM_CLASSES,
        filters_per_layer=config['filters'],
        dense_units=config['dense']
    )
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=BEST_LEARNING_RATE),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    model.summary()

    log_dir = os.path.join("logs", config['id'])
    model_path = os.path.join("models", f"{config['id']}_best_model.keras")
    os.makedirs(os.path.dirname(model_path), exist_ok=True)

    callbacks = [
        TensorBoard(log_dir=log_dir),
        ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_accuracy', mode='max'),
        EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    ]

    history = model.fit(
        train_ds,
        epochs=100, # Set high, EarlyStopping will handle it
        validation_data=val_ds,
        callbacks=callbacks
    )

[1m6060/7859[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m9s[0m 5ms/step - accuracy: 0.9985 - loss: 0.0049

KeyboardInterrupt: 

#### Find best dropout rate

In [None]:
# --- Experiment Set B: Find Ideal Dropout Rate ---
BEST_LEARNING_RATE = 5e-4
BEST_CAPACITY = {'filters': [32,64], 'dense': 128}

dropout_experiments = [
    {'id': 'dropout_0.3', 'rate': 0.3},
    {'id': 'dropout_0.4', 'rate': 0.4},
    {'id': 'dropout_0.5', 'rate': 0.5},
]

for config in dropout_experiments:
    print(f"\n--- Running Dropout Experiment: {config['id']} ---")
    model = create_model(
        input_shape=INPUT_SHAPE,
        num_classes=NUM_CLASSES,
        filters_per_layer=BEST_CAPACITY['filters'],
        dense_units=BEST_CAPACITY['dense'],
        dropout_rate=config['rate']
    )
    model.compile(
        optimizer=tf.keras.optimizers.Adam(BEST_LEARNING_RATE),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    model.summary()

    log_dir = os.path.join("logs", config['id'])
    model_path = os.path.join("models", f"{config['id']}_best_model.keras")
    os.makedirs(os.path.dirname(model_path), exist_ok=True)

    callbacks = [
        TensorBoard(log_dir=log_dir),
        ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_accuracy', mode='max'),
        EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    ]

    history = model.fit(
        train_ds,
        epochs=100,
        validation_data=val_ds,
        callbacks=callbacks
    )




--- Running Dropout Experiment: dropout_0.3 ---


Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 5ms/step - accuracy: 0.9408 - loss: 0.2471 - val_accuracy: 0.9892 - val_loss: 0.0364
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 4ms/step - accuracy: 0.9960 - loss: 0.0136 - val_accuracy: 0.9948 - val_loss: 0.0189
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9978 - loss: 0.0075 - val_accuracy: 0.9965 - val_loss: 0.0165
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 4ms/step - accuracy: 0.9986 - loss: 0.0049 - val_accuracy: 0.9962 - val_loss: 0.0199
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.9987 - loss: 0.0038 - val_accuracy: 0.9955 - val_loss: 0.0254
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 5ms/step - accuracy: 0.9991 - loss: 0.0031 - val_accuracy: 0.9961 - val_loss: 0.0240
Epoc

Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 5ms/step - accuracy: 0.9307 - loss: 0.2794 - val_accuracy: 0.9918 - val_loss: 0.0269
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 6ms/step - accuracy: 0.9956 - loss: 0.0154 - val_accuracy: 0.9940 - val_loss: 0.0236
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 5ms/step - accuracy: 0.9971 - loss: 0.0096 - val_accuracy: 0.9951 - val_loss: 0.0202
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.9985 - loss: 0.0055 - val_accuracy: 0.9968 - val_loss: 0.0131
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9985 - loss: 0.0049 - val_accuracy: 0.9964 - val_loss: 0.0163
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9990 - loss: 0.0035 - val_accuracy: 0.9969 - val_loss: 0.0183
Epoc

Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.9157 - loss: 0.3383 - val_accuracy: 0.9907 - val_loss: 0.0316
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 5ms/step - accuracy: 0.9940 - loss: 0.0211 - val_accuracy: 0.9949 - val_loss: 0.0162
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 5ms/step - accuracy: 0.9963 - loss: 0.0124 - val_accuracy: 0.9964 - val_loss: 0.0149
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 5ms/step - accuracy: 0.9976 - loss: 0.0080 - val_accuracy: 0.9971 - val_loss: 0.0137
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.9979 - loss: 0.0071 - val_accuracy: 0.9962 - val_loss: 0.0155
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 5ms/step - accuracy: 0.9982 - loss: 0.0053 - val_accuracy: 0.9961 - val_loss: 0.0178
Epoc

#### Test adamW

In [15]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

BEST_LEARNING_RATE = 5e-4
BEST_FILTERS = [32,64]
BEST_DENSE_UNITS = 128
BEST_DROPOUT_RATE = 0.4

# -----------------------------------------------------------------

print("--- Running Experiment: AdamW Optimizer ---")

# 2. Create your best model architecture
model_adamw = create_model(
    input_shape=INPUT_SHAPE,
    num_classes=NUM_CLASSES,
    filters_per_layer=BEST_FILTERS,
    dense_units=BEST_DENSE_UNITS,
    dropout_rate=BEST_DROPOUT_RATE
)

# 3. Compile the model using the AdamW optimizer
model_adamw.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=BEST_LEARNING_RATE,
        weight_decay=1e-4  # A common default for weight decay
    ),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model_adamw.summary()

# 4. Set up callbacks to save the best version of this model
model_path = 'models/adamw_best_model.keras'
os.makedirs(os.path.dirname(model_path), exist_ok=True)

callbacks_adamw = [
    ModelCheckpoint(filepath=model_path, save_best_only=True, monitor='val_accuracy', mode='max'),
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
]

# 5. Train the model
history_adamw = model_adamw.fit(
    train_ds,
    epochs=100, # Set high, EarlyStopping will handle it
    validation_data=val_ds,
    callbacks=callbacks_adamw
)

--- Running Experiment: AdamW Optimizer ---


Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.9318 - loss: 0.2777 - val_accuracy: 0.9929 - val_loss: 0.0219
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.9960 - loss: 0.0150 - val_accuracy: 0.9955 - val_loss: 0.0175
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 0.9976 - loss: 0.0084 - val_accuracy: 0.9937 - val_loss: 0.0263
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 0.9981 - loss: 0.0063 - val_accuracy: 0.9959 - val_loss: 0.0189
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.9986 - loss: 0.0042 - val_accuracy: 0.9954 - val_loss: 0.0164
Epoch 6/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0037 - val_accuracy: 0.9946 - val_loss: 0.0285
Epoc

#### Final model training
Given the best parameterst found, train the best model with increased patience and decreasing learning rate

In [17]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# --- Step 4a: Re-train with Learning Rate Decay & More Patience ---
BEST_LR = 5e-4
BEST_FILTERS = [32,64]
BEST_DENSE = 128
BEST_DROPOUT = 0.4

print("--- Starting final training with LR decay ---")

final_model = create_model(
    input_shape=INPUT_SHAPE,
    num_classes=NUM_CLASSES,
    filters_per_layer=BEST_FILTERS,
    dense_units=BEST_DENSE,
    dropout_rate=BEST_DROPOUT
)

final_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=BEST_LR),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Set up callbacks with more patience and the LR scheduler
final_callbacks = [
    ModelCheckpoint(filepath='models/model_with_decay.keras', save_best_only=True, monitor='val_accuracy', mode='max'),
    EarlyStopping(monitor='val_accuracy', patience=10), # Increased patience
    ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3) # Add scheduler
]
final_model.fit(train_ds, epochs=100, validation_data=val_ds, callbacks=final_callbacks)

--- Starting final training with LR decay ---
Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9293 - loss: 0.2889 - val_accuracy: 0.9936 - val_loss: 0.0207 - learning_rate: 5.0000e-04
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.9950 - loss: 0.0170 - val_accuracy: 0.9952 - val_loss: 0.0174 - learning_rate: 5.0000e-04
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.9973 - loss: 0.0093 - val_accuracy: 0.9940 - val_loss: 0.0249 - learning_rate: 5.0000e-04
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.9984 - loss: 0.0056 - val_accuracy: 0.9961 - val_loss: 0.0157 - learning_rate: 5.0000e-04
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0043 - val_accuracy: 0.9961 - val_loss: 0.0164 - learning_r

<keras.src.callbacks.history.History at 0x7da24e2a0a50>

#### If needed (based on training logs), implement a custom callback to stop training at peak performance

In [21]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

class HaltOnThreshold(tf.keras.callbacks.Callback):
  """Callback that halts training when validation accuracy reaches a threshold."""
  def __init__(self, threshold):
    super(HaltOnThreshold, self).__init__()
    self.threshold = threshold

  def on_epoch_end(self, epoch, logs=None):
    val_accuracy = logs.get("val_accuracy")
    if val_accuracy is not None and val_accuracy >= self.threshold:
      print(f"\\nReached {self.threshold:.4f} validation accuracy. Halting training.")
      self.model.stop_training = True

# --- Step 4a: Re-train with Learning Rate Decay & More Patience ---
BEST_LR = 5e-4
BEST_FILTERS = [32, 64]
BEST_DENSE = 128
BEST_DROPOUT = 0.4

print("--- Starting final training with LR decay ---")

final_model = create_model(
    input_shape=INPUT_SHAPE,
    num_classes=NUM_CLASSES,
    filters_per_layer=BEST_FILTERS,
    dense_units=BEST_DENSE,
    dropout_rate=BEST_DROPOUT
)

final_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=BEST_LR),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model_checkpoint = ModelCheckpoint(filepath='models/model_with_decay3.keras', save_best_only=True, monitor='val_accuracy', mode='max')
early_stopper = EarlyStopping(monitor='val_accuracy', patience=10)
lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3)
halt_on_target = HaltOnThreshold(threshold=0.9978)

final_callbacks = [
    model_checkpoint,
    early_stopper,
    lr_scheduler,
    halt_on_target
]

# Train the model
final_model.fit(train_ds, epochs=100, validation_data=val_ds, callbacks=final_callbacks)

--- Starting final training with LR decay ---
Epoch 1/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4ms/step - accuracy: 0.9312 - loss: 0.2754 - val_accuracy: 0.9949 - val_loss: 0.0176 - learning_rate: 5.0000e-04
Epoch 2/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 4ms/step - accuracy: 0.9954 - loss: 0.0165 - val_accuracy: 0.9920 - val_loss: 0.0240 - learning_rate: 5.0000e-04
Epoch 3/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.9974 - loss: 0.0089 - val_accuracy: 0.9958 - val_loss: 0.0231 - learning_rate: 5.0000e-04
Epoch 4/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.9981 - loss: 0.0061 - val_accuracy: 0.9964 - val_loss: 0.0192 - learning_rate: 5.0000e-04
Epoch 5/100
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.9984 - loss: 0.0048 - val_accuracy: 0.9954 - val_loss: 0.0190 - learning_r

<keras.src.callbacks.history.History at 0x7da251034710>

In [41]:
# Backing up the data, just in case

from google.colab import drive
import os

print("Mounting Google Drive...")
drive.mount('/content/drive')

backup_filename = "ocr_project_backup_final.zip"
drive_path = f"/content/drive/MyDrive/{backup_filename}"

print("\nZipping 'models' and 'logs' directories...")
!zip -r {backup_filename} ./models ./logs

print(f"\nCopying {backup_filename} to Google Drive...")
!cp {backup_filename} /content/drive/MyDrive/

print(f"\n✅ Backup complete! '{backup_filename}' is now saved in your Google Drive.")

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Zipping 'models' and 'logs' directories...
  adding: models/ (stored 0%)
  adding: models/model_with_decay2.keras (deflated 7%)
  adding: models/dropout_0.4_best_model.keras (deflated 7%)
  adding: models/capacity_large2_best_model.keras (deflated 7%)
  adding: models/final_model_quant.tflite (deflated 13%)
  adding: models/capacity_large_best_model.keras (deflated 7%)
  adding: models/model_with_decay3.keras (deflated 7%)
  adding: models/final_model.keras (deflated 7%)
  adding: models/lr_1e-4_best_model.keras (deflated 8%)
  adding: models/adamw_best_model.keras (deflated 7%)
  adding: models/model_with_decay.keras (deflated 7%)
  adding: models/lr_1e-3_best_model.keras (deflated 7%)
  adding: models/capacity_medium_best_model.keras (deflated 7%)
  adding: models/capacity_small_best_model.keras (deflated 10%)
  adding: models/.ipy

After training it, set a very small learning rate to find the absolute minimum

In [28]:

# --- Step 4b: Final Polish with Tiny Learning Rate ---
print("\n--- Starting final polish with tiny learning rate ---")

# Load the best model from the previous step
polished_model = tf.keras.models.load_model('models/model_with_decay3.keras')

# Re-compile with a very small learning rate
polished_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
halt_on_target = HaltOnThreshold(threshold=0.998)

# Train for just a few more epochs
polished_model.fit(train_ds, epochs=5, validation_data=val_ds, callbacks=[halt_on_target])

# Save your final, fully-trained model
polished_model.save('models/final_model.keras')
print("✅ Final model saved as models/final_model.keras")


--- Starting final polish with tiny learning rate ---
Epoch 1/5
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4ms/step - accuracy: 1.0000 - loss: 7.7927e-05 - val_accuracy: 0.9978 - val_loss: 0.0133
Epoch 2/5
[1m7854/7859[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 3.8357e-05\nReached 0.9980 validation accuracy. Halting training.
[1m7859/7859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 1.0000 - loss: 3.8354e-05 - val_accuracy: 0.9980 - val_loss: 0.0132
✅ Final model saved as models/final_model.keras


### Evaluation

In [30]:
import tensorflow as tf

# --- Evaluate the Best Model ---
best_model_path = 'models/final_model.keras'

print(f"Loading best model from: {best_model_path}")
loaded_model = tf.keras.models.load_model(best_model_path)

# Evaluate the loaded model on your actual evaluation data
print("\nEvaluating on the processed evaluation dataset:")
loss, accuracy = loaded_model.evaluate(val_ds, verbose=2)

print(f"\nEvaluation Accuracy: {accuracy*100:.5f}%")

Loading best model from: models/final_model.keras

Evaluating on the processed evaluation dataset:
333/333 - 2s - 5ms/step - accuracy: 0.9980 - loss: 0.0132

Evaluation Accuracy: 99.80282%


### Post-training quantization

#### Benchmark final model

In [36]:
import tensorflow as tf
import numpy as np
import time

fp32_model = tf.keras.models.load_model('models/final_model.keras')

# --- 1. Measure Accuracy ---
print("Evaluating FP32 model accuracy...")
loss, fp32_accuracy = fp32_model.evaluate(val_ds, verbose=0)
print(f"FP32 Model Accuracy: {fp32_accuracy * 100:.4f}%")

# --- 2. Measure Inference Speed ---
inference_batch = next(iter(val_ds.unbatch().batch(1)))

# warm up
_ = fp32_model.predict(inference_batch, verbose=0)

# Time the inference
print("\nTiming FP32 model inference...")
start_time = time.time()
_ = fp32_model.predict(inference_batch, verbose=0)
end_time = time.time()
fp32_inference_time = (end_time - start_time) * 1000 # in milliseconds
print(f"FP32 Model Inference Time: {fp32_inference_time:.4f} ms")

Evaluating FP32 model accuracy...
FP32 Model Accuracy: 99.8028%


Expected: input_layer_14
Received: inputs=('Tensor(shape=(1, 30, 40, 1))', 'Tensor(shape=(1,))')



Timing FP32 model inference...
FP32 Model Inference Time: 70.4129 ms


#### Apply quantization

In [37]:
# --- 1. Create a representative dataset generator ---
# This helps the converter figure out the data ranges for activations.
def representative_dataset_gen():
  # Use a small sample from your training data (e.g., ~100-200 images)
  for images, _ in train_ds.take(100):
    yield [images]

# --- 2. Convert the model ---
converter = tf.lite.TFLiteConverter.from_keras_model(fp32_model)

# This enables full integer quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
# This ensures the model is strictly integer-only for max performance
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Convert and save the quantized model
tflite_quant_model = converter.convert()
with open('models/final_model_quant.tflite', 'wb') as f:
  f.write(tflite_quant_model)

print("\n✅ Quantized INT8 model saved as 'final_model_quant.tflite'")

Saved artifact at '/tmp/tmpko0k80v5'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 30, 40, 1), dtype=tf.float32, name='input_layer_14')
Output Type:
  TensorSpec(shape=(None, 35), dtype=tf.float32, name=None)
Captures:
  138134946069200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946068240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946069968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946065168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946068816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946067664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946066320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946065936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946067088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138134946067472: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1381349460690




✅ Quantized INT8 model saved as 'final_model_quant.tflite'


#### Benchmark the final quantized model

In [38]:
# --- 1. Load the TFLite model and allocate tensors ---
interpreter = tf.lite.Interpreter(model_path='models/final_model_quant.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

# --- 2. Evaluate Accuracy on the validation set ---
print("\nEvaluating INT8 TFLite model accuracy...")
correct_predictions = 0
total_images = 0
for images, labels in val_ds:
  for i in range(images.shape[0]):
    # Get a single image and check if it needs to be quantized
    image = images[i:i+1]
    if input_details['dtype'] == np.int8:
      input_scale, input_zero_point = input_details["quantization"]
      image = tf.cast((image / input_scale + input_zero_point), dtype=tf.int8)

    interpreter.set_tensor(input_details['index'], image)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details['index'])

    if np.argmax(output) == labels[i]:
      correct_predictions += 1
    total_images += 1

int8_accuracy = correct_predictions / total_images
print(f"INT8 TFLite Model Accuracy: {int8_accuracy * 100:.4f}%")

# --- 3. Measure Inference Speed ---
print("\nTiming INT8 TFLite model inference...")
interpreter.set_tensor(input_details['index'], image) # Use the last image for timing
# Warm up
interpreter.invoke()
# Time it
start_time = time.time()
interpreter.invoke()
end_time = time.time()
int8_inference_time = (end_time - start_time) * 1000 # in milliseconds
print(f"INT8 TFLite Model Inference Time: {int8_inference_time:.4f} ms")


Evaluating INT8 TFLite model accuracy...
INT8 TFLite Model Accuracy: 99.7840%

Timing INT8 TFLite model inference...
INT8 TFLite Model Inference Time: 0.4947 ms
