In [None]:
%pip install tensorflow torch opencv-python pandas scikit-learn scipy

In [None]:
import tensorflow as tf
import os

# Define the path to the saved model file
model_save_dir = 'saved_ocr_model'
model_save_path = os.path.join(model_save_dir, 'combined_ocr_model.keras')

# Check if the model file exists before attempting to load
if os.path.exists(model_save_path):
    print(f"Loading the model from: {model_save_path}")
    try:
        # Load the model
        loaded_model = tf.keras.models.load_model(model_save_path)

        print("Model loaded successfully.")

        # You can optionally print a summary of the loaded model to verify
        # loaded_model.summary()

    except Exception as e:
        print(f"An error occurred while loading the model: {e}")
        loaded_model = None # Set to None if loading fails
else:
    print(f"Error: Model file not found at {model_save_path}. Please ensure the model has been saved.")
    loaded_model = None # Set to None if file not found

# Now you can use the 'loaded_model' for predictions
if loaded_model is not None:
    print("\nThe loaded model is ready for use.")

In [None]:
import pandas as pd
import os

# Define the path to the test CSV file
test_csv_file_path = 'extracted_archive/testdata.csv'

# Check if the CSV file exists
if not os.path.exists(test_csv_file_path):
    print(f"Error: The file {test_csv_file_path} was not found.")
else:
    try:
        # Read the CSV file into a pandas DataFrame
        # Assuming the file is comma-separated and has a header row
        # We'll explicitly name the columns as they might not be consistently named
        test_df = pd.read_csv(test_csv_file_path, sep=',', header=0, names=['ImgName', 'GroundTruth', 'smallLexi', 'mediumLexi'])

        # --- Filter DataFrame to include only 'test' images ---
        initial_rows_test = len(test_df)
        test_df = test_df[test_df['ImgName'].str.startswith('test/')]
        filtered_rows_test = len(test_df)
        print(f"Filtered test data: Kept {filtered_rows_test} rows starting with 'test/' out of {initial_rows_test}.")
        # --- End filtering ---


        # Print the first few rows of the DataFrame
        print("Test CSV data loaded successfully:")
        display(test_df.head())

        # Print DataFrame information
        print("\nTest DataFrame Info:")
        test_df.info()

    except Exception as e:
        print(f"An error occurred while reading the test CSV file: {e}")

In [None]:
import os
import cv2
import pandas as pd
import numpy as np

# Assume resize_image, normalize_pixels, and grayscale_image functions are already defined
# Assume test_df is loaded from the previous step

# 1. Create empty lists
processed_test_images = []
original_test_labels = []

# Correct Base directory for images - same as training data
base_image_dir = 'extracted_archive/IIIT5K-Word_V3.0/IIIT5K'

# Target size for resizing (same as training)
target_size = (128, 32) # (width, height)

# Check if test_df exists and is not empty before proceeding
if 'test_df' in locals() and not test_df.empty:
    # 2. Iterate through each row of the test_df DataFrame
    for index, row in test_df.iterrows():
        # 3. Get image path and text label
        image_path_relative = row['ImgName']
        text_label = row['GroundTruth']

        # Construct the full image path
        full_image_path = os.path.join(base_image_dir, image_path_relative)

        # 4. Check if the image file exists
        if os.path.exists(full_image_path):
            # 5. Read the image
            img = cv2.imread(full_image_path)

            # 6. If the image is successfully loaded (not None)
            if img is not None:
                # Apply preprocessing steps
                gray_img = grayscale_image(img)
                resized_img = resize_image(gray_img, target_size)
                normalized_img = normalize_pixels(resized_img) # This is float32

                # 7. Append the preprocessed image (float32) to the list
                processed_test_images.append(normalized_img)

                # 8. Append the original text label
                original_test_labels.append(text_label)
            else:
                # 9. If image cannot be loaded, print warning and skip
                print(f"Warning: Could not load image file: {full_image_path}")
        else:
            # 9. If image file not found, print warning and skip
            print(f"Warning: Image file not found: {full_image_path}")

    # 10. Convert the processed_test_images list into a NumPy array
    processed_test_images = np.array(processed_test_images)

    # 11. Convert the original_test_labels list into a pandas Series or NumPy array
    original_test_labels = pd.Series(original_test_labels)

    # 12. Print the shape and length to verify
    print("Preprocessing of test images complete.")
    print("Shape of processed_test_images array:", processed_test_images.shape)
    print("Length of original_test_labels list:", len(original_test_labels))

else:
    print("DataFrame 'test_df' not found or is empty. Please run the cell to load the test CSV first.")

In [None]:
import nltk
from nltk.metrics.distance import edit_distance

# Download the necessary NLTK data
try:
    nltk.download('averaged_perceptron_tagger', quiet=True)
    nltk.download('punkt', quiet=True)
    print("NLTK data ('averaged_perceptron_tagger', 'punkt') downloaded successfully.")
except Exception as e:
    print(f"Error downloading NLTK data: {e}")


def calculate_cer(ground_truth, prediction):
    """
    Calculates the Character Error Rate (CER) between two strings.

    Args:
        ground_truth: The ground truth string.
        prediction: The predicted string.

    Returns:
        The Character Error Rate (float). Returns 0 if ground_truth is empty.
    """
    # Handle empty ground truth to avoid division by zero
    if len(ground_truth) == 0:
        return 0.0

    # Calculate Levenshtein distance (character-level)
    levenstein_dist = edit_distance(ground_truth, prediction)

    # CER is Levenshtein distance divided by the length of the ground truth
    cer = levenstein_dist / len(ground_truth)
    return cer

def calculate_wer(ground_truth, prediction):
    """
    Calculates the Word Error Rate (WER) between two strings.

    Args:
        ground_truth: The ground truth string.
        prediction: The predicted string.

    Returns:
        The Word Error Rate (float). Returns 0 if ground_truth is empty (after splitting into words).
    """
    # Split strings into words
    # Use a simple split by space for word tokenization
    ground_truth_words = ground_truth.split()
    prediction_words = prediction.split()

    # Handle empty ground truth word list to avoid division by zero
    if len(ground_truth_words) == 0:
        # If ground truth is an empty string, WER should arguably be 0
        # If prediction is also empty, error is 0. If prediction is not empty, error is high.
        # A common approach for empty reference is to return 0 if hypothesis is also empty, else inf or 1.
        # Let's return 0 for consistency with CER on empty string.
        return 0.0


    # Calculate Levenshtein distance (word-level)
    # edit_distance can work on lists
    levenstein_dist = edit_distance(ground_truth_words, prediction_words)


    # WER is Levenshtein distance divided by the number of words in the ground truth
    wer = levenstein_dist / len(ground_truth_words)
    return wer

print("Character Error Rate (CER) and Word Error Rate (WER) calculation functions defined.")

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

# Redefine the decode_predictions function using a greedy approach
def decode_predictions_greedy(predictions, int_to_char):
    """
    Decodes the model's output predictions (probability distributions) into text sequences
    using a greedy approach (taking the argmax at each time step) and handling blank tokens.

    Args:
        predictions: A TensorFlow Tensor of shape (time_steps, num_classes) or
                     (batch_size, time_steps, num_classes) representing the predicted probabilities.
        int_to_char: A dictionary mapping integer indices to characters.

    Returns:
        A list of decoded text strings (if input was batched) or a single string.
    """
    if len(predictions.shape) == 2: # Single sample (time_steps, num_classes)
        predictions = tf.expand_dims(predictions, axis=0) # Add batch dimension

    # Get the index of the most probable character at each time step
    predicted_indices = tf.argmax(predictions, axis=-1, output_type=tf.int32) # Shape (batch_size, time_steps)

    # Find the integer value for the blank token
    blank_int = len(int_to_char) - 1

    decoded_texts = []
    # Iterate through each sample in the batch
    for sample_indices in predicted_indices.numpy(): # Convert tensor to NumPy array
        decoded_sequence = []
        # Iterate through the predicted indices for this sample
        last_added = -1 # To handle CTC repeated character rule

        for index in sample_indices:
            # Check if the current index is a blank token
            if index == blank_int:
                last_added = blank_int # Remember that the last token was blank
            else:
                # If the current index is different from the last added index
                # and the last added index was not a blank token, or if the last added was blank
                if index != last_added or last_added == blank_int:
                    decoded_sequence.append(int_to_char[index])
                last_added = index # Update last added index

        # Join characters to form the decoded string
        decoded_text = "".join(decoded_sequence)
        decoded_texts.append(decoded_text)

    # If the input was a single sample, return a single string
    if tf.shape(predictions)[0] == 1:
        return decoded_texts[0]
    else:
        return decoded_texts

print("Greedy decode predictions function defined.")

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import random # Import random to select random samples
import cv2
from google.colab.patches import cv2_imshow # For displaying the image with regions

# Assume test_df is available from previous steps (loaded testdata.csv)
# Assume combined_model is the trained model (from the previous training loop)
# Assume decode_predictions_greedy and int_to_char are defined (cell 79728814)
# Assume calculate_cer and calculate_wer functions are defined (cell kOLluGXQ8bLg)
# Assume grayscale_image, resize_image, and normalize_pixels functions are defined (cell 4db0fd6b)
# Assume target_size = (128, 32) is defined

# --- Configuration ---
# Define the base directory for images (same as used for loading test data)
base_image_dir = 'extracted_archive/IIIT5K-Word_V3.0/IIIT5K'

# Number of samples to visualize
num_samples_to_visualize = 10 # You can change this number

# --- Evaluate Model on Test Set ---
print("Starting evaluation on test set (sample by sample) with greedy decoding...")

# Convert the processed_test_images NumPy array to a TensorFlow Tensor
# Ensure processed_test_images is in the correct shape (num_samples, height, width, 1)
# and dtype (float32)
# Assume processed_test_images is available from cell 7dfe8ff3
if 'processed_test_images' in locals() and processed_test_images.size > 0:
    if len(processed_test_images.shape) == 3: # Check if it's (num_samples, height, width)
        processed_test_images = np.expand_dims(processed_test_images, axis=-1) # Add channel dimension

    test_images_tensor = tf.constant(processed_test_images, dtype=tf.float32)

    # Initialize empty lists to store the calculated CER and WER
    all_cer = []
    all_wer = []

    total_samples = test_images_tensor.shape[0]
    print(f"Processing {total_samples} test samples...")

    # Process test data sample by sample to avoid batching issues with decoding
    # Keep track of the current index
    current_index = 0

    for i in range(total_samples):
        # Get a single image tensor
        image_tensor = tf.expand_dims(test_images_tensor[i], axis=0) # Add batch dimension for prediction

        # Get the model's prediction for the single image
        # Predict will return shape (1, time_steps, num_classes)
        prediction_single = combined_model.predict(image_tensor, verbose=0)

        # Decode the single prediction using the greedy function
        # Pass the prediction without the batch dimension for easier handling in the greedy function
        decoded_text_single = decode_predictions_greedy(prediction_single[0], int_to_char)

        # Get the corresponding ground truth label (from the original test_df)
        # Assume original_test_labels is available from cell 7dfe8ff3
        ground_truth_label = original_test_labels[i]

        # Convert ground truth and predicted labels to lowercase for case-insensitive evaluation
        ground_truth_lower = ground_truth_label.lower()
        prediction_lower = decoded_text_single.lower()

        # Calculate CER and WER
        cer = calculate_cer(ground_truth_lower, prediction_lower)
        wer = calculate_wer(ground_truth_lower, prediction_lower)

        # Append to lists
        all_cer.append(cer)
        all_wer.append(wer)

        current_index += 1

        if current_index % 100 == 0:
            print(f"Processed {current_index}/{total_samples} samples.")

    # Calculate the average CER and WER
    average_cer = np.mean(all_cer)
    average_wer = np.mean(all_wer)

    # Print the results
    print("\n--- Evaluation Results ---")
    print(f"Average Character Error Rate (CER): {average_cer:.4f}")
    print(f"Average Word Error Rate (WER): {average_wer:.4f}")

    print("Evaluation completed.")

    # --- Visualize Predictions on Random Samples ---
    # Check if test_df exists and is not empty for visualization
    if 'test_df' in locals() and not test_df.empty:
        # Get a list of all possible indices in the test DataFrame
        all_indices = list(test_df.index)

        # Select random indices for visualization
        if len(all_indices) >= num_samples_to_visualize:
            sample_indices = random.sample(all_indices, num_samples_to_visualize)
        else:
            # If not enough samples, visualize all available
            sample_indices = all_indices
            num_samples_to_visualize = len(all_indices) # Update the number to visualize

        print(f"\nVisualizing predictions for {num_samples_to_visualize} random samples from the test set.")
        print("-" * 30)


        # Process and Visualize Samples
        for i in sample_indices:
            # Get the image path and ground truth label for the selected index
            row = test_df.iloc[i]
            image_path_relative = row['ImgName']
            ground_truth_label = row['GroundTruth']

            # Construct the full image path
            full_image_path = os.path.join(base_image_dir, image_path_relative)

            # Load the original image for display
            original_image = cv2.imread(full_image_path)

            if original_image is not None:
                # Preprocess the image for the model (using defined functions)
                gray_img = grayscale_image(original_image)
                resized_img = resize_image(gray_img, target_size)
                normalized_img = normalize_pixels(resized_img) # float32
                # Add channel and batch dimensions
                preprocessed_img_tensor = tf.constant(np.expand_dims(np.expand_dims(normalized_img, axis=-1), axis=0), dtype=tf.float32)

                # Get model prediction
                model_prediction = combined_model.predict(preprocessed_img_tensor, verbose=0)

                # Decode the prediction using the greedy decoding function
                decoded_text = decode_predictions_greedy(model_prediction[0], int_to_char)

                # --- Display Results ---
                print(f"Sample Index: {i}")
                print(f"Ground Truth: {ground_truth_label}")
                print(f"Model Prediction: {decoded_text}")

                # Display the original image (resized for consistent size)
                display_original_img = cv2.resize(original_image, (200, 50)) # Example resize
                cv2_imshow(display_original_img)

            else:
                print(f"Warning: Could not load image file for visualization: {full_image_path}")

            print("-" * 30)

        print("Visualization complete.")

    else:
        print("\nDataFrame 'test_df' not found or is empty. Skipping visualization.")

else:
    print("NumPy array 'processed_test_images' not found or is empty. Skipping evaluation and visualization.")