In [7]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

In [8]:
MODEL_PATH = "my_model.h5"  
model = load_model(MODEL_PATH)



In [9]:
def process_webcam_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)
    thresh = cv2.adaptiveThreshold(gray, 255,
                                   cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    thresh = cv2.medianBlur(thresh, 3)
    thresh = cv2.dilate(thresh, np.ones((3,3), np.uint8), iterations=1)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest)

        size = max(w,h)
        pad_w = (size - w)//2
        pad_h = (size - h)//2

        square = np.zeros((size, size), dtype=np.uint8)
        square[pad_h:pad_h+h, pad_w:pad_w+w] = thresh[y:y+h, x:x+w]

        resized = cv2.resize(square, (20,20))
        canvas = np.zeros((28,28), dtype=np.uint8)
        canvas[4:24, 4:24] = resized
    else:
        canvas = cv2.resize(thresh, (28,28))

    final = canvas.astype("float32") / 255.0
    final = final.flatten()           # flatten to 784
    final = np.expand_dims(final, axis=0)  # (1,784)
    
    return final, canvas


In [10]:
def webcam_prediction():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("ERROR: Could not open webcam")
        return

    print("Webcam ready! Draw numbers in the green box. Press 'q' to quit.")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        h, w = frame.shape[:2]
        roi_size = 150
        x1 = (w - roi_size)//2
        y1 = (h - roi_size)//2
        x2 = x1 + roi_size
        y2 = y1 + roi_size
        display_frame = frame.copy()
        cv2.rectangle(display_frame, (x1,y1), (x2,y2), (0,255,0), 2)

        roi = frame[y1:y2, x1:x2]
        processed, debug_img = process_webcam_image(roi)
        prediction = model.predict(processed, verbose=0)
        predicted_class = np.argmax(prediction)
        confidence = np.max(prediction)

        cv2.putText(display_frame, f"Pred: {predicted_class}", (10,30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2.putText(display_frame, f"Conf: {confidence:.2f}", (10,70),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)

        # Show what model sees
        debug_display = cv2.resize(debug_img, (100,100))
        debug_display = cv2.cvtColor(debug_display, cv2.COLOR_GRAY2BGR)
        display_frame[10:110, w-110:w-10] = debug_display
        cv2.putText(display_frame, "CNN sees", (w-105,120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,255,0), 1)

        # Show webcam
        cv2.imshow("MNIST Webcam Prediction", display_frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [11]:
if __name__ == "__main__":
    webcam_prediction()

Webcam ready! Draw numbers in the green box. Press 'q' to quit.
