In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator


mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255


datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2), 
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=5)
model.save('improved_digit_model.h5')
print("Model trained and saved!")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model trained and saved!


  saving_api.save_model(


In [6]:
import cv2
import numpy as np
import tensorflow as tf

model = tf.keras.models.load_model('improved_digit_model.h5')
cap = cv2.VideoCapture(0)

print("Webcam started. Position the digit inside the green box.")

while True:
    ret, frame = cap.read()
    if not ret: 
        break

    height, width, _ = frame.shape
    x1, y1, x2, y2 = width//2-100, height//2-100, width//2+100, height//2+100
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
    roi = frame[y1:y2, x1:x2]

    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
    
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 5)

    kernel = np.ones((3,3), np.uint8)
    thresh = cv2.dilate(thresh, kernel, iterations=1) 

    img_for_model = cv2.resize(thresh, (28, 28))
    img_input = img_for_model.reshape(1, 28, 28, 1).astype('float32') / 255

    prediction = model.predict(img_input, verbose=0)
    digit = np.argmax(prediction)
    probability = np.max(prediction)

    if probability > 0.7: 
        text = f"Digit: {digit} ({int(probability*100)}%)"
        cv2.putText(frame, text, (x1, y1-10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    
    cv2.imshow('Digit Recognizer', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Webcam started. Position the digit inside the green box.
