In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install tensorflow tensorflow-datasets matplotlib gdown



In [3]:
import os, json
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models

# Paths in Drive
DRIVE_BASE = "/content/drive/MyDrive/HandwrittenCharacterRecognition"
MODEL_PATH = os.path.join(DRIVE_BASE, "models", "cnn_emnist.h5")
LABELS_PATH = os.path.join(DRIVE_BASE, "labels", "label_names.json")
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
os.makedirs(os.path.dirname(LABELS_PATH), exist_ok=True)

# Load EMNIST Balanced dataset
(ds_train, ds_test), ds_info = tfds.load(
    "emnist/balanced",
    split=["train","test"],
    as_supervised=True,
    with_info=True
)

num_classes = ds_info.features['label'].num_classes
print("Number of classes:", num_classes)

# Get label names
try:
    label_names = ds_info.features['label'].names
except:
    label_names = [str(i) for i in range(num_classes)]

# Preprocess images
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.expand_dims(image, -1) if image.shape.rank == 2 else image
    label = tf.cast(label, tf.int32)
    return image, tf.one_hot(label, depth=num_classes)

BATCH = 128
AUTOTUNE = tf.data.AUTOTUNE

train_ds = ds_train.map(preprocess, num_parallel_calls=AUTOTUNE).shuffle(10000).batch(BATCH).prefetch(AUTOTUNE)
test_ds  = ds_test.map(preprocess, num_parallel_calls=AUTOTUNE).batch(BATCH).prefetch(AUTOTUNE)

# Build CNN model
def build_model(input_shape=(28,28,1), n_classes=num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32,(3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64,(3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()
model.summary()

# Train model
EPOCHS = 3  # Change to more epochs later for better accuracy
history = model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)

# Save model and label mapping to Drive
model.save(MODEL_PATH)
with open(LABELS_PATH, 'w') as f:
    json.dump(label_names, f, ensure_ascii=False)

print("Model saved to:", MODEL_PATH)
print("Label mapping saved to:", LABELS_PATH)


Number of classes: 47


Epoch 1/3
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 18ms/step - accuracy: 0.5827 - loss: 1.4740 - val_accuracy: 0.8449 - val_loss: 0.4668
Epoch 2/3
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.8266 - loss: 0.5208 - val_accuracy: 0.8594 - val_loss: 0.4021
Epoch 3/3
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.8489 - loss: 0.4394 - val_accuracy: 0.8713 - val_loss: 0.3757




Model saved to: /content/drive/MyDrive/HandwrittenCharacterRecognition/models/cnn_emnist.h5
Label mapping saved to: /content/drive/MyDrive/HandwrittenCharacterRecognition/labels/label_names.json
