In [11]:
import os
import cv2
import numpy as np
#from tensorflow.keras.utils import to_categorical
import tensorflow as tf





In [12]:
def load_and_preprocess_data(data_path):
    images = []
    labels = []

    class_names = os.listdir(data_path)

    # Create a mapping from class names to integer labels
    class_to_index = {class_name: idx for idx, class_name in enumerate(class_names)}

    for symbol_class in class_names:
        class_path = os.path.join(data_path, symbol_class)
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (32, 32))  # Resize to a consistent size
            image = image / 255.0  # Normalize pixel values
            images.append(image)
            labels.append(class_to_index[symbol_class])

    images = np.array(images)
    
    # Use to_categorical from tensorflow.keras.utils.np_utils
    labels = tf.one_hot(labels, depth=len(class_names))

    return images, labels

In [13]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [15]:
def build_symbol_detection_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))  # Softmax for multi-class classification

    # Modify the loss function to 'categorical_crossentropy'
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
images, labels = load_and_preprocess_data('data\extracted_images')


In [None]:
input_shape = (32, 32, 1)  # Assuming images are grayscale
num_classes = len(os.listdir('data\extracted_images'))

In [None]:
num_classes

82

In [None]:
images.shape

(375974, 32, 32)

In [None]:
labels=labels.numpy()

In [None]:
labels.shape

(375974, 82)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

model = build_symbol_detection_model(input_shape, num_classes)
#model = build_symbol_detection_model(input_shape, num_classes=len(class_names))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1d44c975f00>

In [None]:
i=0
for symbol_class in range(16, len(os.listdir('data\extracted_images'))):
        print(os.listdir('data\extracted_images')[symbol_class])
        

=
A
alpha
ascii_124
b
beta
C
cos
d
Delta
div
e
exists
f
forall
forward_slash
G
gamma
geq
gt
H
i
in
infty
int
j
k
l
lambda
ldots
leq
lim
log
lt
M
mu
N
neq
o
p
phi
pi
pm
prime
q
R
rightarrow
S
sigma
sin
sqrt
sum
T
tan
theta
times
u
v
w
X
y
z
[
]
{
}


In [None]:
labels

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [None]:
def get_symbol_description(predicted_class):
    # Map numeric classes to corresponding descriptions
    class_descriptions = {
        0: 'exclamatory mark',
        1: 'Opening round bracket',
        2: 'closing round bracket',
        3: 'Plus',
        4: 'Comma',
        5: 'minus',
        6: 'Zero',
        7: 'One',
        8: 'Two',
        9: 'Three',
        10: 'Four',
        11: 'Five',
        12: 'Six',
        13: 'Seven',
        14: 'Eight',
        15:  'Nine',
        16: 'Equal to',
        17: 'a',
        18: 'alpha',
        19: 'ascii',
        20: 'b',
        70:  'Theta',
        21: 'beta',
        22: 'c',
        23: 'cos',
        24: 'd',
        25: 'delta',
        26: 'devide by',
        27: 'e',
        28: 'there exists',
        29: 'f',
        30: 'for all',
        31: 'forward slash',
        32: 'g',
        33: 'gamma'
        # Add more descriptions as needed
    }

    # Return the description for the predicted class, or a default if not found
    return class_descriptions.get(predicted_class, 'Unknown Symbol')


In [None]:
# Example using gTTS (Google Text-to-Speech) library
from gtts import gTTS
import IPython.display as ipd

def generate_audio_description(text):
    tts = gTTS(text=text, lang='en', slow=False)
    tts.save('audio_description.mp3')
    ipd.Audio('audio_description.mp3')


In [None]:
def detect_and_describe_symbols(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (32, 32))
    image = image / 255.0
    image = np.expand_dims(image, axis=0)

    prediction = model.predict(image)
    predicted_class = np.argmax(prediction)
    
    #predicted_class=predicted_class-6
    symbol_description = get_symbol_description(predicted_class)
    generate_audio_description(symbol_description)

    return predicted_class


In [None]:
import IPython.display as ipd

In [None]:
def detect_and_describe_symbols(symbol_image):
    image = cv2.resize(symbol_image, (32, 32))
    image = image / 255.0
    image = np.expand_dims(image, axis=0)

    prediction = model.predict(image)
    predicted_class = np.argmax(prediction)
    symbol_description = get_symbol_description(predicted_class)
    
    return predicted_class, symbol_description


In [None]:
def detect_and_describe_formula(formula_image_path):
    # Split the formula image into individual symbols
    symbol_images = split_formula_into_symbols(formula_image_path)

    # Detect and describe each symbol
    symbols = []
    for symbol_image in symbol_images:
        predicted_class, symbol_description = detect_and_describe_symbols(symbol_image)
        symbols.append((predicted_class, symbol_description))
        print(f"Predicted Class: {predicted_class}, Description: {symbol_description}")

    # Combine individual symbol descriptions into a formula description
    formula_description = ' '.join([symbol[1] for symbol in symbols])

    # Generate audio description for the entire formula
    generate_audio_description(formula_description)

    return formula_description


In [None]:
def split_formula_into_symbols(formula_image_path):
    # Load the formula image
    formula_image = cv2.imread(formula_image_path, cv2.IMREAD_GRAYSCALE)
    
    # Threshold the image to create a binary image
    _, binary_image = cv2.threshold(formula_image, 128, 255, cv2.THRESH_BINARY_INV)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort contours by their x-coordinate to get symbols from left to right
    contours = sorted(contours, key=lambda x: cv2.boundingRect(x)[0])

    # Extract individual symbols
    symbols = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        
        # Extract individual symbols based on bounding rectangles
        symbol_image = formula_image[y:y+h, x:x+w]
        symbols.append(symbol_image)

    return symbols

In [None]:
formula_image_path = "inp.jpg"
detected_symbols = split_formula_into_symbols(formula_image_path)
detect_and_describe_formula(formula_image_path)

NameError: name 'cv2' is not defined

In [None]:
image_path = 'inp.jpg'
detect_and_describe_formula(image_path)

NameError: name 'cv2' is not defined