In [1]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from collections import deque
from statistics import mode

In [2]:
model = load_model('mnist_model.keras')
cap = cv2.VideoCapture(0+ cv2.CAP_DSHOW)

In [3]:
def to_model_shape(x28):
    """
    Convert a 28x28 grayscale image to the shape expected by the model.
    """
    x28 = x28.astype("float32") / 255.0
    in_shape = model.input_shape  

    if len(in_shape) == 2 and in_shape[1] == 784:   # flat input
        return x28.reshape(1, 784)
    else:                                           # image input
        return x28.reshape(1, 28, 28, 1)


def preprocess_mnist(roi_bgr):
    """
    Process ROI (region of interest) from webcam frame into MNIST-style 28x28 digit.
    Steps:
        1.Convert to grayscale
        2. Blur + adaptive threshold
        3. Find largest contour (digit candidate)
        4. Extract, dilate, resize, and center on 28x28 canvas
    """
    # grayscale + blur + threshold
    g = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    g = cv2.GaussianBlur(g, (5, 5), 0)
    th = cv2.adaptiveThreshold(
        g, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 11, 2
    )

    # contour detection
    cnts, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not cnts:
        return None, {"reason": "no contour"}

    # largest contour
    c = max(cnts, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(c)
    if w * h < 300:   # reject tiny blobs
        return None, {"reason": "tiny area"}

    # extract + dilate
    digit = th[y:y+h, x:x+w]
    digit = cv2.dilate(digit, np.ones((3, 3), np.uint8), iterations=1)

    # keep aspect ratio, resize longer side to 20
    H, W = digit.shape
    if H > W:
        newH, newW = 20, max(1, int(W * (20.0 / H)))
    else:
        newW, newH = 20, max(1, int(H * (20.0 / W)))
    digit = cv2.resize(digit, (newW, newH), interpolation=cv2.INTER_AREA)

    # paste into 28x28 canvas
    canvas = np.zeros((28, 28), dtype=np.uint8)
    x0, y0 = (28 - newW) // 2, (28 - newH) // 2
    canvas[y0:y0+newH, x0:x0+newW] = digit

    # center of mass shift
    ys, xs = np.nonzero(canvas)
    if len(xs):
        cx, cy = xs.mean(), ys.mean()
        sx, sy = int(round(14 - cx)), int(round(14 - cy))
        M = np.float32([[1, 0, sx], [0, 1, sy]])
        canvas = cv2.warpAffine(canvas, M, (28, 28),
                                flags=cv2.INTER_NEAREST,
                                borderValue=0)

    return canvas, {"area": w * h}

In [4]:
history = deque(maxlen=7)   # smoothing buffer
CONF = 0.55                 # confidence threshold

In [5]:
while True:
    ok, frame = cap.read()
    if not ok:
        break

    # Region of Interest (ROI)
    H, W = frame.shape[:2]
    bw, bh = 160, 160
    x1, y1 = max(0, W//2 - bw//2), max(0, H//2 - bh//2)
    x2, y2 = min(W, W//2 + bw//2), min(H, H//2 + bh//2)
    roi = frame[y1:y2, x1:x2]

    # Preprocess
    x28, info = preprocess_mnist(roi)

    # Visualization setup
    disp = frame.copy()
    color = (0, 255, 0) if x28 is not None else (0, 0, 255)
    cv2.rectangle(disp, (x1, y1), (x2, y2), color, 2)

    # Prediction
    text = "No digit"
    if x28 is not None:
        inp = to_model_shape(x28)
        preds = model.predict(inp, verbose=0)[0]
        cls = int(np.argmax(preds))
        prob = float(np.max(preds))
        history.append((cls, prob))

        # smoothing: most frequent confident class in history
        votes = [c for c, p in history if p >= CONF]
        if votes:
            cls_sm = mode(votes)
            prob_sm = max(p for c, p in history if c == cls_sm)
            text = f"Pred: {cls_sm}  Prob: {prob_sm:.2f}"
        else:
            text = f"Pred: ?  Prob: {prob:.2f}"

        # debug window for digit
        view = cv2.resize(x28, (140, 140), interpolation=cv2.INTER_NEAREST)
        cv2.imshow("digit28", view)

    # Display prediction
    cv2.putText(disp, text, (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9,
                (255, 0, 255), 2)
    cv2.imshow("input", disp)

    # Exit on ESC 
    if cv2.waitKey(1) & 0xFF == 27:
        break

In [6]:
cap.release()
cv2.destroyAllWindows()