In [2]:
pip install tensorflow

Collecting tensorflow
  Using cached tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow)
  Using cached grpcio-1.71.0-cp312-cp312-win_amd64.whl.metadata (4.0 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Using cached tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting keras>=3.5.0 (from tensorflow)
  Using cached keras-3.9.0-py3-none-any.whl.metadata (6.1 kB)
Collecting h5py>=3.11.0 (from tensorflow)
  Using cached h5py-3.13.0-cp312-cp312-win

In [5]:
pip install kagglehub

Collecting kagglehub
  Downloading kagglehub-0.3.10-py3-none-any.whl.metadata (31 kB)
Downloading kagglehub-0.3.10-py3-none-any.whl (63 kB)
Installing collected packages: kagglehub
Successfully installed kagglehub-0.3.10
Note: you may need to restart the kernel to use updated packages.


In [16]:
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Step 1: Load Images from Subfolders
dataset_folder = "C:\\Users\\chdnv\\.cache\\kagglehub\\datasets\\sagyamthapa\\handwritten-math-symbols\\versions\\4\\dataset"
symbols = sorted(os.listdir(dataset_folder))  # Ensure consistent label order

data, labels = [], []

for label, symbol in enumerate(symbols):
    folder_path = os.path.join(dataset_folder, symbol)
    
    if not os.path.isdir(folder_path):  # Skip non-folder files
        continue
    
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)

        if not img_name.lower().endswith(('.png', '.jpg', '.jpeg')):  # Skip invalid files
            continue
        
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping invalid image: {img_path}")
            continue
        
        img = cv2.resize(img, (28, 28))  # Resize to 28x28
        data.append(img)
        labels.append(label)

# Convert lists to numpy arrays
X = np.array(data).reshape(len(data), 28, 28, 1) / 255.0  # Normalize to [0,1]
y = np.array(labels)

# Step 2: Split into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=len(symbols))
y_test = to_categorical(y_test, num_classes=len(symbols))

# Step 3: Build CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.6),  # Increased dropout to reduce overfitting
    Dense(len(symbols), activation='softmax')
])

# Step 4: Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 5: Train Model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Step 6: Save Model
model.save("handwritten_math_solver.keras")
print("Model saved successfully!")

# Step 7: Test Prediction on Sample Image
def predict_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    if img is None:
        print(f"Error: Could not load image from {img_path}")
        return
    
    img = cv2.resize(img, (28, 28)) / 255.0
    img = img.reshape(1, 28, 28, 1)
    
    prediction = model.predict(img)
    predicted_label = np.argmax(prediction)
    
    print(f"Predicted index: {predicted_label}, Symbol: {symbols[predicted_label]}")

# Example usage
sample_image = "image2.jpg"  # Change to your test image path
predict_image(sample_image)


Epoch 1/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.1544 - loss: 2.7290 - val_accuracy: 0.6154 - val_loss: 1.3658
Epoch 2/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5238 - loss: 1.5161 - val_accuracy: 0.7801 - val_loss: 0.8102
Epoch 3/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6537 - loss: 1.0885 - val_accuracy: 0.8263 - val_loss: 0.6151
Epoch 4/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7239 - loss: 0.8586 - val_accuracy: 0.8536 - val_loss: 0.5345
Epoch 5/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7616 - loss: 0.7361 - val_accuracy: 0.8754 - val_loss: 0.4312
Epoch 6/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7850 - loss: 0.6646 - val_accuracy: 0.8839 - val_loss: 0.3941
Epoch 7/20
[1m252/252[0



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
Predicted index: 9, Symbol: 8


In [7]:
import kagglehub

# Download dataset
dataset_path = kagglehub.dataset_download("sagyamthapa/handwritten-math-symbols")

# Print the dataset path
print("Dataset Path:", dataset_path)


Dataset Path: C:\Users\chdnv\.cache\kagglehub\datasets\sagyamthapa\handwritten-math-symbols\versions\4


In [8]:
print("Contents:", os.listdir(dataset_path))

Contents: ['dataset']


In [17]:
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# Step 1: Load Images from Subfolders
dataset_folder = "C:\\Users\\chdnv\\.cache\\kagglehub\\datasets\\sagyamthapa\\handwritten-math-symbols\\versions\\4\\dataset"
symbols = sorted(os.listdir(dataset_folder))  # Ensure consistent label order

data, labels = [], []

for label, symbol in enumerate(symbols):
    folder_path = os.path.join(dataset_folder, symbol)
    
    if not os.path.isdir(folder_path):  # Skip non-folder files
        continue
    
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        
        if not img_name.lower().endswith(('.png', '.jpg', '.jpeg')):  # Skip invalid files
            continue
        
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping invalid image: {img_path}")
            continue
        
        img = cv2.resize(img, (28, 28))  # Resize to 28x28
        data.append(img)
        labels.append(label)

# Convert lists to numpy arrays
X = np.array(data).reshape(len(data), 28, 28, 1) / 255.0  # Normalize to [0,1]
y = np.array(labels)

# Step 2: Split into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=len(symbols))
y_test = to_categorical(y_test, num_classes=len(symbols))

# Step 3: Data Augmentation
data_gen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)

# Step 4: Build CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    Flatten(),
    
    Dense(256, activation='relu'),
    Dropout(0.5),  # Prevent overfitting
    Dense(len(symbols), activation='softmax')
])

# Step 5: Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 6: Training with Early Stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(data_gen.flow(X_train, y_train, batch_size=32),
          validation_data=(X_test, y_test),
          epochs=20,
          callbacks=[early_stop])

# Step 7: Save Model
model.save("handwritten_math_solver.h5")
print("Model saved successfully!")

# Step 8: Test Prediction on Sample Image
def predict_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    if img is None:
        print(f"Error: Could not load image from {img_path}")
        return
    
    img = cv2.resize(img, (28, 28)) / 255.0
    img = img.reshape(1, 28, 28, 1)
    
    prediction = model.predict(img)
    predicted_label = np.argmax(prediction)
    
    print(f"Predicted index: {predicted_label}, Symbol: {symbols[predicted_label]}")

# Example usage
sample_image = "image2.jpg"  # Change to your test image path
predict_image(sample_image)


Epoch 1/20


  self._warn_if_super_not_called()


[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.3379 - loss: 2.4450 - val_accuracy: 0.0789 - val_loss: 3.3956
Epoch 2/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.6946 - loss: 1.0039 - val_accuracy: 0.6675 - val_loss: 1.0837
Epoch 3/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.7924 - loss: 0.6754 - val_accuracy: 0.8298 - val_loss: 0.5764
Epoch 4/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.8290 - loss: 0.5540 - val_accuracy: 0.5752 - val_loss: 3.6904
Epoch 5/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.8595 - loss: 0.4562 - val_accuracy: 0.8730 - val_loss: 0.4317
Epoch 6/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 26ms/step - accuracy: 0.8771 - loss: 0.4233 - val_accuracy: 0.7911 - val_loss: 0.8701
Epoch 7/20
[1m252/252[0m [32m



Model saved successfully!




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
Predicted index: 9, Symbol: 8
