In [3]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

label_map = {
    'equation1': 'x^2',
    'equation2': '\sqrt(x)',
    'equation3': '\sqrt[3]{x}',
    'equation4': '\\ frac{x}{y}',
    'equation5': '\\ frac{1}{2}',
    'equation6': '\ ax+b=0',
    'equation7': '\ ax^2+bx+c=0',
    'equation8': '\delta=b^2-4ac',
    'equation9': '\(ab)^n=a^nb^n',
    'equation10': '(a^m)^n=a^mn',
}

# Load and preprocess the dataset with padding
def load_data(data_dir):
    images = []
    labels = []
    
    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for img_file in os.listdir(label_path):
                img_path = os.path.join(label_path, img_file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                h, w = img.shape
                desired_h, desired_w = 128, 128
                
                # If the image is smaller than the desired size, calculate padding
                pad_h = max(0, (desired_h - h) // 2)
                pad_w = max(0, (desired_w - w) // 2)

                # Resize image to fit within desired dimensions, keeping aspect ratio
                img_resized = cv2.resize(img, (desired_w - 2 * pad_w, desired_h - 2 * pad_h))
                
                # Create a blank canvas with padding
                canvas = np.zeros((desired_h, desired_w), dtype=np.uint8)
                canvas[pad_h:pad_h + img_resized.shape[0], pad_w:pad_w + img_resized.shape[1]] = img_resized

                # Normalize
                canvas = canvas / 255.0
                images.append(canvas)
                labels.append(label_map[label])

    images = np.expand_dims(np.array(images), axis=-1)  # Add channel dimension
    return images, np.array(labels)


# Define the main function
def main():
    data_dir = r'C:\Users\Harshitha D\OneDrive\Desktop\Kakracholi\dataset\data_prepared'  # Path to your dataset folder
    X, y = load_data(data_dir)

    # Encode the labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Build the CNN model
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(128, 128, 1)),  # Updated input shape
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(label_map), activation='softmax')  # Output layer for number of classes
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, epochs=55, batch_size=16, validation_data=(X_test, y_test))

    # Save the model
    model.save('handwritten_equation_model.h5')
    print("Model saved as 'handwritten_equation_model.h5'")

# Run the main function
if __name__ == '__main__':
    main()


Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epoch 34/55
Epoch 35/55
Epoch 36/55
Epoch 37/55
Epoch 38/55
Epoch 39/55
Epoch 40/55
Epoch 41/55
Epoch 42/55
Epoch 43/55
Epoch 44/55
Epoch 45/55
Epoch 46/55
Epoch 47/55
Epoch 48/55
Epoch 49/55
Epoch 50/55
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
Model saved as 'handwritten_equation_model.h5'
