In [1]:
import cv2
import numpy as np
import tensorflow as tf


model = tf.keras.models.load_model("./mnist.keras")

# Start webcam
cap = cv2.VideoCapture(0)

prediction_text = "Show a digit & press SPACE"
print("Press SPACE to predict, Q to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)

    # ROI for digit
    x1, y1, x2, y2 = 100, 50, 500, 450
    roi = frame[y1:y2, x1:x2]
    cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)

    # Preprocessing|
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5), 0)
    _, thresh = cv2.threshold(
        blur, 0, 255,
        cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
    )

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    digit_img = None
    if contours:
        # Largest contour = digit
        largest = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(largest)

        if area > 1500:     
            x, y, w, h = cv2.boundingRect(largest)
            digit = thresh[y:y+h, x:x+w]

            digit = cv2.resize(digit, (28, 28))

            digit_img = digit.astype("float32") / 255.0

            cv2.rectangle(roi, (x,y), (x+w,y+h), (255,0,0), 2)

    key = cv2.waitKey(1) & 0xFF

    # SPACE â†’ RUN prediction
    if key == 32 and digit_img is not None:
        img = digit_img.reshape(1, 784)

        pred = model.predict(img, verbose=0)
        prediction = np.argmax(pred)
        confidence = pred[0][prediction]
        print(pred)
        print(prediction)
        print(confidence)

        if confidence >= 0.95:
            prediction_text = f"Pred: {prediction}  ({confidence:.2f})"
        else:
            prediction_text = "Could not catch"

    # Quit
    if key == ord('q'):
        break

    # Draw prediction on screen
    cv2.putText(
        frame,
        prediction_text,
        (150, 80),
        cv2.FONT_HERSHEY_SIMPLEX,
        1.2,
        (0,0,255),
        2
    )

    # Show windows
    cv2.imshow("MNIST Digit Recognition", frame)
    cv2.imshow("Threshold", thresh)

cap.release()
cv2.destroyAllWindows()


2025-12-20 10:24:02.140591: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-12-20 10:24:02.140611: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-12-20 10:24:02.140616: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.66 GB
2025-12-20 10:24:02.140631: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-20 10:24:02.140639: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Press SPACE to predict, Q to quit


2025-12-20 10:24:21.595550: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[[5.0301028e-30 1.1192684e-29 4.8465578e-25 3.2936401e-12 0.0000000e+00
  8.4352480e-16 2.1504645e-08 2.3198628e-28 1.0000000e+00 4.9086612e-08]]
8
1.0
[[0.0000000e+00 0.0000000e+00 0.0000000e+00 2.4336202e-18 0.0000000e+00
  0.0000000e+00 1.5099144e-36 0.0000000e+00 1.0000000e+00 0.0000000e+00]]
8
1.0
[[2.5082693e-18 1.3356009e-27 1.7543847e-17 3.1464538e-06 0.0000000e+00
  1.2701913e-29 5.7479987e-24 1.5839891e-15 6.5903483e-10 9.9999690e-01]]
9
0.9999969
[[0.000000e+00 0.000000e+00 3.227777e-10 6.005261e-17 0.000000e+00
  0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 8.051023e-32]]
8
1.0
[[7.9958375e-12 5.6470586e-20 1.3891845e-16 1.0456870e-17 2.1345755e-37
  1.3633579e-19 1.7772065e-02 2.9572305e-36 9.8222798e-01 2.2497114e-18]]
8
0.982228
[[4.7813344e-11 5.0880764e-21 5.4140708e-17 2.7621887e-18 1.2011650e-37
  9.6987735e-21 5.7984020e-05 7.8380534e-36 9.9994206e-01 1.6396358e-18]]
8
0.99994206
[[5.58426395e-11 9.27985414e-21 2.29252029e-15 1.03173154e-17
  1.66992051e-38 1