In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt


from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, MaxPooling2D, Dense

In [None]:
le = LabelEncoder()
images = []
labels = []

training_paths = ['data/raw/character_set1/training_data/',
                 'data/raw/character_set3/', ]
#testing_path = 'data/raw/character_set1/testing_data/'


In [None]:
for training_path in training_paths:
    dir_list = os.listdir(training_path)
    for i in dir_list:
      dir = os.path.join(training_path, i)
      file_list = os.listdir(dir)
      for j in file_list:
        files = os.path.join(dir, j)
        img = cv2.imread(files)
        img = cv2.resize(img, (64,64))
        img = np.array(img, dtype=np.float32)
        img = img/255
        images.append(img)
        labels.append(i)

## Print out details of X and y

In [None]:
X = np.array(images)
print("len(X): ",len(X))
print("X.shape: ", X.shape)

y = np.array(labels)
print("len(y): ",len(y))
print("y.shape: ", y.shape)

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)
X_sh, y_sh = shuffle(X, y, random_state=42)

In [None]:
# Split the data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X_sh, y_sh, test_size=0.2, random_state=42, stratify=y_sh
)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))

# Create Keras model
create a neural network model

In [None]:
model = Sequential([
    Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(64,64,3)),
    MaxPooling2D(),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(units=128, activation='relu'),
    Dense(units=64, activation='relu'),
    Dense(units=len(le.classes_), activation='softmax')  # Dynamic output units
])


In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
history = model.fit(X_sh, y_sh ,validation_split=0.2, batch_size=25, epochs=10)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.show()

In [None]:
# Evaluate the model on the testing set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Evaluate the model on the testing set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Separate Evaluation for Uppercase and Lowercase Characters

# Determine the number of uppercase letters
# This assumes that the first 'num_uppercase' classes are uppercase letters
# Adjust 'num_uppercase' based on your actual label encoding
num_uppercase = 26  # Typically, A-Z

# Find indices for uppercase and lowercase characters
uppercase_indices = np.where(y_test < num_uppercase)
lowercase_indices = np.where(y_test >= num_uppercase)

# Evaluate the model on uppercase characters
test_loss_upper, test_acc_upper = model.evaluate(X_test[uppercase_indices], y_test[uppercase_indices], verbose=0)
print(f"Uppercase Test Loss: {test_loss_upper:.4f}")
print(f"Uppercase Test Accuracy: {test_acc_upper:.4f}")

# Evaluate the model on lowercase characters
test_loss_lower, test_acc_lower = model.evaluate(X_test[lowercase_indices], y_test[lowercase_indices], verbose=0)
print(f"Lowercase Test Loss: {test_loss_lower:.4f}")
print(f"Lowercase Test Accuracy: {test_acc_lower:.4f}")



In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss Over Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.grid(True)
plt.show()

# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy Over Epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()


## Visualization

### Visualize Regions

In [None]:
def visualize_regions(image, regions):
    debug_image = image.copy()
    for (x, y, w, h) in regions:
        cv2.rectangle(debug_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
    cv2.imshow("Detected Text Regions", debug_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

### Visualize Processed Image

In [None]:
def display_processed_image(window_name, processed_image):
    cv2.imshow(window_name, processed_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

## Image Processing Operations

In [None]:
def image_processing_operations_visualization(image, operation):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    if operation == 'threshold':
        # Apply thresholding to get a binary image
        _, processed_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
        window_name = "Thresholded Image"

    elif operation == 'erosion':
        # Apply thresholding and then erosion
        _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
        kernel = np.ones((2, 1), np.uint8)
        processed_image = cv2.erode(thresh, kernel, iterations=1)
        window_name = "Eroded Image"

    elif operation == 'dilation':
        # Apply thresholding and then dilation
        _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
        kernel = np.ones((1, 1), np.uint8)
        processed_image = cv2.dilate(thresh, kernel, iterations=1)
        window_name = "Dilated Image"

    else:
        raise ValueError("Operation must be 'threshold', 'erosion', or 'dilation'")

    # Display the processed image
    display_processed_image(window_name, processed_image)
    
    return _, processed_image

## Italic Characters Detection

### Compute Skew Angle

In [None]:
def compute_skew_angle(region):
    # Use moments to calculate the skew angle of a text region
    coords = np.column_stack(np.where(region > 0))
    rect = cv2.minAreaRect(coords)
    angle = rect[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    return angle

### Deskew Angle

In [None]:
def deskew_region(region, angle):
    # Rotate the region by the computed angle
    (h, w) = region.shape[:2]
    center = (w // 2, h // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(region, rotation_matrix, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=255)
    return rotated

## Detect Text Regions

In [None]:
def detect_text_regions(image):
    # Apply image processing operation (e.g., thresholding, erosion, or dilation)
    _, processed_image = image_processing_operations_visualization(image, 'dilation')
    
    # Find contours (regions of characters)
    contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    regions = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        
        # region = processed_image[y:y+h, x:x+w]
        
        # Compute the skew angle of the detected region
        # angle = compute_skew_angle(region)
        
        # Deskew the region if the skew angle is significant
        # if abs(angle) > 5:  # Adjust the threshold based on your needs
        #     region = deskew_region(region, angle)
        
        # Update bounding box after deskewing (if any change occurred)
        # if abs(angle) > 5:
        #     x, y, w, h = cv2.boundingRect(region)'''
        
        # old
        # regions.append((x-1, y, w+1, h))
        
        # new
        if 0 < w < 100 and 0 < h < 100:  # Size filter
            regions.append((x, y, w, h))
    
    # Visualize the image regions
    visualize_regions(image, regions)
    
    return regions

## Sorting Bounding Boxes by Rows and Columns

In [None]:
def sort_bounding_boxes(regions):
    # Sort by `y` first (top-to-bottom) with a threshold to group by rows
    row_threshold = 20  # Adjust based on character spacing
    regions = sorted(regions, key=lambda box: box[1])

    # Group bounding boxes into rows
    rows = []
    current_row = [regions[0]]
    
    for i in range(1, len(regions)):
        if abs(regions[i][1] - current_row[-1][1]) < row_threshold:
            current_row.append(regions[i])
        else:
            rows.append(current_row)
            current_row = [regions[i]]
    rows.append(current_row)
    
    # Sort each row left-to-right
    sorted_regions = []
    for row in rows:
        sorted_row = sorted(row, key=lambda box: box[0])
        sorted_regions.extend(sorted_row)
    
    return sorted_regions


## Resize Image Operation

In [None]:
def resize_image(image, target_size=(64, 64), mode='normal'):
    if mode == 'normal':
        # Normal resize (ignores aspect ratio)
        resized_image = cv2.resize(image, target_size)
    elif mode == 'aspect_ratio':
        # Resize while maintaining aspect ratio
        (iH, iW) = image.shape[:2]
        if iW > iH:
            resized_image = cv2.resize(image, (target_size[0], int(target_size[0] * iH / iW)))
        else:
            resized_image = cv2.resize(image, (int(target_size[1] * iW / iH), target_size[1]))
        
        # After resizing, padding the image to make it exactly target_size
        (iH, iW) = resized_image.shape
        dX = int(max(0, target_size[0] - iW) / 2.0)
        dY = int(max(0, target_size[1] - iH) / 2.0)
        resized_image = cv2.copyMakeBorder(resized_image, dY, dY, dX, dX, cv2.BORDER_CONSTANT, value=[255, 255, 255])
    else:
        raise ValueError("Mode must be either 'normal' or 'aspect_ratio'")
    
    return resized_image

## Recognize Characters

In [None]:
def recognize_characters(image, model, label_encoder):
    regions = detect_text_regions(image)
    
    sorted_regions = sort_bounding_boxes(regions)
    # print(sorted_regions)

    characters = []
    for (x, y, w, h) in sorted_regions:
        char_image = image[y:y+h, x:x+w]
        
        # Add padding
        pad = 1
        char_image = cv2.copyMakeBorder(char_image, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=[255, 255, 255])
        
        # Check if the char_image is empty
        if char_image.size == 0:
            print(f"Skipping empty character region at ({x}, {y}, {w}, {h})")
            continue  # Skip this iteration if the character image is empty
        
        # Print the shape of the character image for debugging
        print(f"Character image shape: {char_image.shape}")
        
        # Resize the character image
        char_image_resized = resize_image(char_image, target_size=(64, 64), mode='normal')
        char_image_normalized = char_image_resized / 255.0

        cv2.imshow('image after resized',char_image_normalized)
        cv2.waitKey(0)  # Wait indefinitely for a key press
        cv2.destroyAllWindows() 
        
        prediction = model.predict(np.expand_dims(char_image_normalized, axis=0))
        predicted_class = np.argmax(prediction, axis=1)

        if predicted_class[0] not in range(len(label_encoder.classes_)):
            print(f"Unrecognized label: {predicted_class[0]}")
            predicted_char = "?"  # Placeholder for unrecognized labels
        else:
            predicted_char = label_encoder.inverse_transform(predicted_class)[0]
        
        #To be decided later #If the current character is 'i' or 'j' we will
        #pop the last element out (which is supposed to be the dot above small i and j)

        # ----- UNCOMMENT LATER -----
        # if predicted_char == 'i' or predicted_char == 'j'
        #     characters.pop()
        
        characters.append(predicted_char)

    return characters

# Driver Code

In [None]:
imggg = 'data/raw/character_set1/Test_1.png'
image = cv2.imread(imggg) 
y_pred = recognize_characters(image, model, le)
print(y_pred)
print(len(y_pred))