In [None]:
import os
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import hog
from sympy import sympify
import cv2
import matplotlib.pyplot as plt
import joblib

In [None]:
data_dir = "data"
img_size = 45

# Collect labels
symbol_labels = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])

# Label mappings
symbol_to_index = {label: idx for idx, label in enumerate(symbol_labels)}
index_to_label = {v: k for k, v in symbol_to_index.items()}

X_symbols, y_symbols = [], []

# Load dataset images
for label in symbol_labels:
    folder_path = os.path.join(data_dir, label)
    for file in os.listdir(folder_path):
        if file.endswith(".jpg"):
            try:
                img_path = os.path.join(folder_path, file)
                img = load_img(img_path, color_mode="grayscale", target_size=(img_size, img_size))
                img = img_to_array(img) / 255.0
                X_symbols.append(img)
                y_symbols.append(symbol_to_index[label])
            except Exception as e:
                print(f"Error loading {img_path}: {e}")

# Convert to numpy arrays
X_all = np.array(X_symbols)
y_all = np.array(y_symbols)

print("Dataset loaded:", X_all.shape, y_all.shape)
print("Classes:", index_to_label)


In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

# Extract HOG features
def extract_hog_features(images):
    features = []
    for img in images:
        img = img.squeeze()  # 45x45
        hog_feat = hog(
            img,
            orientations=9,
            pixels_per_cell=(4,4),
            cells_per_block=(2,2),
            block_norm="L2-Hys"
        )
        features.append(hog_feat)
    return np.array(features)

X_train_hog = extract_hog_features(X_train)
X_test_hog = extract_hog_features(X_test)

print("HOG feature shape (per image):", X_train_hog.shape[1])

In [None]:
rf_hog = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1,
    min_samples_split=2,
    max_depth=None
)

rf_hog.fit(X_train_hog, y_train)

# Evaluate
y_pred_hog = rf_hog.predict(X_test_hog)
print("Accuracy:", accuracy_score(y_test, y_pred_hog))
print("\nClassification report:\n",
      classification_report(y_test, y_pred_hog,
                            target_names=[index_to_label[i] for i in sorted(index_to_label)]))

In [None]:
joblib.dump(rf_hog, "models/rf_hog.pkl")

In [None]:
joblib.dump(index_to_label, "models/index_to_label.pkl")

In [None]:
rf_hog = joblib.load("models/rf_hog.pkl")

y_pred = rf_hog.predict(X_test_hog)
print("Loaded model accuracy:", accuracy_score(y_test, y_pred))

In [None]:
def segment_expression(img_path, img_size=45, show_steps=False):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    # Threshold (invert so text is white)
    _, thresh = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)

    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    symbol_imgs = []
    bounding_boxes = []

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)

        # Skip very small contours
        #if w < 2 or h < 2:
            #continue

        roi = thresh[y:y+h, x:x+w]

        # Add padding to make square
        side = max(w, h)
        square = np.zeros((side, side), dtype=np.uint8)
        square[(side-h)//2:(side-h)//2+h, (side-w)//2:(side-w)//2+w] = roi

        # Resize to 45x45
        roi_resized = cv2.resize(square, (img_size, img_size), interpolation=cv2.INTER_AREA)

        # Normalize
        roi_resized = roi_resized.astype("float32") / 255.0
        roi_resized = np.expand_dims(roi_resized, axis=-1)

        symbol_imgs.append(roi_resized)
        bounding_boxes.append((x, y, w, h))

    # Sort symbols left-to-right
    sorted_symbols = [s for _, s in sorted(zip(bounding_boxes, symbol_imgs), key=lambda b: b[0][0])]

    if show_steps:
        plt.imshow(thresh, cmap="gray")
        plt.title("Thresholded image")
        plt.show()

        for i, s in enumerate(sorted_symbols):
            plt.subplot(1, len(sorted_symbols), i+1)
            plt.imshow(s.squeeze(), cmap="gray")
            plt.axis("off")
        plt.show()

    return sorted_symbols


In [None]:
def classify_expression(img_path):
    symbols = segment_expression(img_path, img_size=45, show_steps=True)
    predicted_chars = [predict_symbol_rf_hog(s) for s in symbols]
    return "".join(predicted_chars)


In [None]:
# Predicting with hog instead of just RF
def predict_symbol_rf_hog(img):
    img = img.squeeze()
    hog_feat = hog(
        img,
        orientations=9,
        pixels_per_cell=(4, 4),
        cells_per_block=(2, 2),
        block_norm="L2-Hys"
    ).reshape(1, -1)

    probs = rf_hog.predict_proba(hog_feat)[0]

    for cls, p in zip(rf_hog.classes_, probs):
        print(f"{cls}: {p:.2f}")

    pred_idx = rf_hog.predict(hog_feat)[0]
    return index_to_label[pred_idx]

In [None]:
# Sanity check
test_symbol = segment_expression("test_images/math_img.png", img_size=45)[0]
print("Segment shape:", test_symbol.shape)

from skimage.feature import hog
feat = hog(test_symbol.squeeze(),
           orientations=9,
           pixels_per_cell=(4,4),
           cells_per_block=(2,2),
           block_norm="L2-Hys").reshape(1,-1)
print("HOG length:", feat.shape[1])

In [None]:
def solving(expr_str):
    expr_str = expr_str.replace("times", "*")
    expr_str = expr_str.replace("div", "/")
    expr_str = expr_str.replace("sqrt", "**0.5")

    try:
        expr = sympify(expr_str)
        return expr.evalf()
    except Exception as e:
        return f"Error: {e}"

# Example: feed an image
expr_img = "test_images/math_img.png"

expr_str = classify_expression(expr_img)
print("Recognized expression:", expr_str)

result = solving(expr_str)
print("Result:", result)


In [None]:
# Example: feed an image
expr_img = "test_images/math.png"

expr_str = classify_expression(expr_img)
print("Recognized expression:", expr_str)

result = solve_expression(expr_str)
print("Result:", result)

In [None]:
expr_img = "test_images/math.png"

expr_str = classify_expression(expr_img)
print("Recognized expression:", expr_str)

result = solve_expression(expr_str)
print("Result:", result)

In [None]:
expr_img = "test_images/math3.png"

expr_str = classify_expression(expr_img)
print("Recognized expression:", expr_str)

result = solve_expression(expr_str)
print("Result:", result)