In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing import image_dataset_from_directory
from PIL import Image
from itertools import groupby



In [3]:

# 1. Load EMNIST Dataset
(ds_train, ds_test), ds_info = tfds.load(
    'emnist/balanced',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

In [4]:
# Normalize and preprocess the dataset
def preprocess(image, label):
    image = tf.image.resize(image, [28, 28])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

ds_train = ds_train.map(preprocess).batch(128).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.map(preprocess).batch(128).prefetch(tf.data.AUTOTUNE)

# Convert labels to categorical
num_classes = ds_info.features['label'].num_classes

In [11]:
# 2. Model Building
model = Sequential()

# Convolutional layers
model.add(Conv2D(32, kernel_size=(5,5), padding="Same", activation="relu", input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=(5,5), padding="Same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3,3), padding="Same", activation="relu"))
model.add(Conv2D(64, kernel_size=(3,3), padding="Same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# Fully connected layers
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(num_classes, activation="softmax"))  # Adjusted for EMNIST classes

# Compile the model
optimizer = RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])


# Learning rate reduction
learning_rate_reduction = ReduceLROnPlateau(monitor="val_accuracy", patience=3, verbose=1, factor=0.5, min_lr=0.0001)

# Train the model
epochs = 20
history = model.fit(
    ds_train,
    epochs=epochs,
    validation_data=ds_test,
    verbose=2,
    callbacks=[learning_rate_reduction]
)

# Save the model
model.save("emnist_recognition_model.h5")



Epoch 1/20
882/882 - 62s - 70ms/step - accuracy: 0.7620 - loss: 0.7639 - val_accuracy: 0.8592 - val_loss: 0.4289 - learning_rate: 0.0010
Epoch 2/20
882/882 - 60s - 68ms/step - accuracy: 0.8599 - loss: 0.4074 - val_accuracy: 0.8755 - val_loss: 0.3699 - learning_rate: 0.0010
Epoch 3/20
882/882 - 59s - 67ms/step - accuracy: 0.8760 - loss: 0.3545 - val_accuracy: 0.8774 - val_loss: 0.3519 - learning_rate: 0.0010
Epoch 4/20
882/882 - 54s - 61ms/step - accuracy: 0.8829 - loss: 0.3286 - val_accuracy: 0.8871 - val_loss: 0.3423 - learning_rate: 0.0010
Epoch 5/20
882/882 - 53s - 60ms/step - accuracy: 0.8881 - loss: 0.3125 - val_accuracy: 0.8875 - val_loss: 0.3435 - learning_rate: 0.0010
Epoch 6/20
882/882 - 61s - 69ms/step - accuracy: 0.8925 - loss: 0.3021 - val_accuracy: 0.8883 - val_loss: 0.3434 - learning_rate: 0.0010
Epoch 7/20
882/882 - 71s - 80ms/step - accuracy: 0.8940 - loss: 0.2952 - val_accuracy: 0.8896 - val_loss: 0.3327 - learning_rate: 0.0010
Epoch 8/20
882/882 - 56s - 64ms/step - ac



In [12]:
# Load and preprocess the dataset
from emnist import extract_training_samples, extract_test_samples
#from tensorflow.keras.utils import to_categorical
import numpy as np

def load_emnist_data():
    try:
        # Extract training and test samples from EMNIST dataset
        X_train, y_train = extract_training_samples('balanced')
        X_test, y_test = extract_test_samples('balanced')
        
        # Normalize pixel values
        X_train = X_train.astype('float32') / 255.0
        X_test = X_test.astype('float32') / 255.0
        
        # Reshape data for CNN input
        X_train = X_train.reshape(-1, 28, 28, 1)
        X_test = X_test.reshape(-1, 28, 28, 1)
        
        # Convert labels to categorical format
        y_train = to_categorical(y_train, num_classes=47)
        y_test = to_categorical(y_test, num_classes=47)
        
        return X_train, X_test, y_train, y_test
    except Exception as e:
        print(f"An error occurred while loading the EMNIST dataset: {e}")
        print("Please ensure you have manually downloaded the dataset as instructed.")
        return None, None, None, None

# Load the data
X_train, X_test, y_train, y_test = load_emnist_data()

if X_train is not None:
    print("Dataset loaded successfully.")
    print(f"Training data shape: {X_train.shape}")
    print(f"Test data shape: {X_test.shape}")
else:
    print("Failed to load the dataset. Please check the error message above.")



Dataset loaded successfully.
Training data shape: (112800, 28, 28, 1)
Test data shape: (18800, 28, 28, 1)


In [13]:
# 3. Prediction Function
def predict_equation(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert("L")
    w, h = image.size
    new_h = 28
    new_w = int((w / h) * 28)
    image = image.resize((new_w, new_h))
    image_arr = np.array(image)
    image_arr = 255 - image_arr
    image_arr = image_arr / 255.0

    # Split image into individual characters
    m = image_arr.any(0)
    chars = [image_arr[:,[*g]] for k, g in groupby(np.arange(len(m)), lambda x: m[x] != 0) if k]

    # Preprocess each character
    char_arrays = []
    for char in chars:
        width = char.shape[1]
        filler = (28 - width) // 2
        char_padded = np.pad(char, ((0,0), (filler, 28-width-filler)), mode='constant')
        char_arrays.append(char_padded.reshape(28, 28, 1))

    char_arrays = np.array(char_arrays)

    # Predict
    predictions = model.predict(char_arrays)
    predicted_classes = np.argmax(predictions, axis=1)

    return predicted_classes



In [14]:
# 4. Equation Evaluation
def evaluate_equation(predicted_classes):
    operators = {10: "/", 11: "+", 12: "-", 13: "*"}
    equation = ""
    for cls in predicted_classes:
        if cls < 10:
            equation += str(cls)
        else:
            equation += operators[cls]
    
    try:
        result = eval(equation)
        return f"{equation} = {result}"
    except:
        return f"Invalid equation: {equation}"



In [24]:
# Main Loop
image_path = "C:/Users/joana/OneDrive/Desktop/HSLU/3rd_semester/CV/c_vision_ocr/data/img_one.png"  # replace with your actual file path

predicted_classes = predict_equation(image_path)
result = evaluate_equation(predicted_classes)
print(result)

ValueError: index can't contain negative values