In [None]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset in Colab
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize images: pixel values between 0 and 1
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0

# Create a label map to convert numbers 0-9 to letters A-Z
label_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J'}
# Convert numerical labels to their corresponding characters
y_train = np.array([label_map[i] for i in y_train])
y_test = np.array([label_map[i] for i in y_test])

# Convert characters to integers for `to_categorical`
num_classes = 26  # Letters A-Z
y_train = [ord(c) - ord('A') for c in y_train]  # Convert 'A'-'Z' to 0-25
y_test = [ord(c) - ord('A') for c in y_test]  # Convert 'A'-'Z' to 0-25

# One-hot encode labels
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

print("Data loaded and preprocessed successfully!")

# Build the CNN model
def build_cnn_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax')  # 26 output classes for letters A-Z
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',  # Use categorical_crossentropy for one-hot encoded labels
                  metrics=['accuracy'])
    return model

# Train the model
cnn_model = build_cnn_model()
history = cnn_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64)

# Save the model
model_path = "handwritten_text_recognition_model.h5"  # Use .h5 extension
cnn_model.save(model_path)
print(f"Model saved at: {model_path}")

# Download the model
from google.colab import files
!zip -r handwritten_text_recognition_model.zip handwritten_text_recognition_model.h5
files.download('handwritten_text_recognition_model.zip')


Data loaded and preprocessed successfully!
Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 53ms/step - accuracy: 0.8194 - loss: 0.5908 - val_accuracy: 0.9808 - val_loss: 0.0565
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 52ms/step - accuracy: 0.9707 - loss: 0.1011 - val_accuracy: 0.9852 - val_loss: 0.0422
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 52ms/step - accuracy: 0.9795 - loss: 0.0710 - val_accuracy: 0.9887 - val_loss: 0.0325
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 52ms/step - accuracy: 0.9841 - loss: 0.0549 - val_accuracy: 0.9914 - val_loss: 0.0276
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 51ms/step - accuracy: 0.9867 - loss: 0.0442 - val_accuracy: 0.9899 - val_loss: 0.0294
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 53ms/step - accuracy: 0.9880 - loss: 0.0391 - val_accuracy:



Model saved at: handwritten_text_recognition_model.h5
  adding: handwritten_text_recognition_model.h5 (deflated 7%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import tensorflow as tf
from PIL import Image
import numpy as np
import os

# Load the trained model
model_path = 'handwritten_text_recognition_model.h5'
model = tf.keras.models.load_model(model_path)

# Function to preprocess the uploaded image
def preprocess_image(image_path):
    # Load image, resize to 28x28 and convert to grayscale
    image = Image.open(image_path).convert('L')
    image = image.resize((28, 28))
    # Normalize pixel values
    image = np.array(image) / 255.0
    # Add batch dimension
    image = np.expand_dims(image, axis=0)
    return image

# Function to predict the text from the image
def predict(input_image_path):
    # Preprocess the image
    image = preprocess_image(input_image_path)

    # Predict
    prediction = model.predict(image)

    # Get the predicted class index for each time step
    predicted_classes = np.argmax(prediction, axis=1)  # Corrected axis to 1

    # Create a label map
    label_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J'}
    # Initialize an empty list for recognized characters
    recognized_text = [label_map[char_id] for char_id in predicted_classes if char_id in label_map]

    # Join the characters to form the recognized text
    recognized_text = ''.join(recognized_text)

    # Print the recognized text
    print(f"Recognized Text: {recognized_text}")

# Example usage
input_image_path = os.path.join("/content/drive/MyDrive", "handwritten.jpg")  # Full path to the input image

# Make prediction and print the recognized text
predict(input_image_path)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Recognized Text: B


In [69]:
import tensorflow as tf
from PIL import Image, ImageOps
import numpy as np
import cv2
import os

# Load the trained model
model_path = 'handwritten_text_recognition_model.h5'
model = tf.keras.models.load_model(model_path)

# Function to preprocess each individual character image
def preprocess_image(image):
    # Convert image to grayscale if it's not already
    image = ImageOps.grayscale(image)
    # Resize to 28x28 to match model input
    image = image.resize((28, 28))
    # Normalize pixel values
    image = np.array(image) / 255.0
    # Add batch dimension
    image = np.expand_dims(image, axis=0)
    return image

# Function to detect and segment characters in the image
def segment_characters(image_path):
    # Load the image and convert to grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Thresholding to binarize the image
    _, thresh_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)

    # Find contours of the characters
    contours, _ = cv2.findContours(thresh_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort contours based on their x-coordinate (left to right)
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])

    # List to store segmented character images
    char_images = []

    for contour in contours:
        # Get bounding box for each character
        x, y, w, h = cv2.boundingRect(contour)
        if w * h > 100:  # Filter out too small contours (noise)
            # Extract character image
            char_image = image[y:y + h, x:x + w]
            char_images.append(char_image)

    return char_images

# Function to predict the text from the image
def predict(input_image_path):
    # Segment characters in the image
    char_images = segment_characters(input_image_path)

    # Initialize the recognized text list
    recognized_text = []

    # Predict each character
    for char_image in char_images:
        # Preprocess each character image
        processed_image = preprocess_image(Image.fromarray(char_image))

        # Predict the character using the model
        prediction = model.predict(processed_image)

        # Get the predicted class index
        predicted_class = np.argmax(prediction, axis=1)[0]

        # Map the predicted class index to a character
        label_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J'}

        # Append the recognized character to the list
        recognized_text.append(label_map.get(predicted_class, 'Unknown'))

    # Join the characters to form the recognized text
    recognized_text = ''.join(recognized_text)

    # Print the recognized text
    print(f"Recognized Text: {recognized_text}")

# Example usage
input_image_path = os.path.join("/content/drive/MyDrive", "handwritten.jpg")  # Full path to the input image

# Make prediction and print the recognized text
predict(input_image_path)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4