### Webcam Digit Detection

In [None]:
!pip install uv -q
!uv pip install tensorflow opencv-python numpy matplotlib --system -q

### Load necessary packages

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

%matplotlib inline

### GPU configuration

In [None]:
gpu_device = tf.config.experimental.list_physical_devices('GPU')
print(f"Number of GPU = {len(gpu_device)}")

if len(gpu_device) > 0:
    tf.config.experimental.set_memory_growth(gpu_device[0], True)

### Load MNIST dataset

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(f"Shape of x_train: {x_train.shape}")
print(f"Shape of y_train: {y_train.shape}")
print()
print(f"Shape of x_test: {x_test.shape}")
print(f"Shape of y_test: {y_test.shape}")

### Display sample images

In [None]:
ROWS = 3
COLS = 20
i = 0

plt.figure(figsize=(20,3))
for r in range(ROWS):
    for c in range(COLS):
        plt.subplot(ROWS, COLS, i+1)
        plt.imshow(x_train[i], cmap=plt.cm.gray_r)
        
        plt.xticks([])
        plt.yticks([])
        plt.xlabel(y_train[i])
        i += 1
plt.tight_layout()
plt.show() 

### Normalise data

In [None]:
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

### Build the model

In [None]:
model = keras.Sequential([
    keras.Input(shape=(28 * 28,)),
    layers.Dense(units=256, activation='relu'),
    layers.Dense(units=128, activation='relu'),
    layers.Dense(units=10, activation='softmax')
])

model.summary()

### Compile the model

In [None]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

### Train the model

In [None]:
h = model.fit(x=x_train, y=y_train, epochs=5, batch_size=64, validation_split=0.3)

### Plot the training results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 4))

# Plot loss
axes[0].plot(h.history['loss'], label='Training Loss')
axes[0].plot(h.history['val_loss'], label='Validation Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Plot accuracy
axes[1].plot(h.history['accuracy'], label='Training Accuracy')
axes[1].plot(h.history['val_accuracy'], label='Validation Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

### Evaluate on test data

In [None]:
model.evaluate(x_test, y_test)

### Webcam Digit Detection

In [None]:
cap = cv2.VideoCapture(0)
threshold_value = 100

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Define ROI
    roi_size = 200
    x1, y1 = 220, 140
    x2, y2 = x1 + roi_size, y1 + roi_size
    
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    # Preprocess ROI
    roi = frame[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blurred, threshold_value, 255, cv2.THRESH_BINARY_INV)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(cnt)
        
        if area > 500:
            x, y, w, h = cv2.boundingRect(cnt)
            digit = thresh[y:y+h, x:x+w]
            
            # Make square and pad
            max_dim = max(h, w)
            square = np.zeros((max_dim, max_dim), dtype=np.uint8)
            x_off = (max_dim - w) // 2
            y_off = (max_dim - h) // 2
            square[y_off:y_off+h, x_off:x_off+w] = digit
            padded = cv2.copyMakeBorder(square, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value=0)
            
            # Resize and normalize
            resized = cv2.resize(padded, (28, 28), interpolation=cv2.INTER_AREA)
            normalized = resized.astype("float32") / 255.0
            input_data = normalized.reshape(1, 28 * 28)
            
            # Predict
            prediction = model.predict(input_data, verbose=0)
            digit_class = np.argmax(prediction)
            confidence = np.max(prediction) * 100
            
            cv2.putText(frame, f"{digit_class}", (x1 + 70, y1 + 50), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)
            cv2.putText(frame, f"{confidence:.0f}%", (x1 + 70, y1 + 90), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow('Digit', cv2.resize(resized, (100, 100)))
    
    cv2.putText(frame, "Place digit here", (x1, y1 - 10), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    cv2.putText(frame, f"Threshold: {threshold_value} (+/- to adjust, Q to quit)", 
               (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    
    cv2.imshow('Webcam', frame)
    cv2.imshow('Threshold', thresh)
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or key == ord('Q'):
        break
    elif key == ord('+') or key == ord('='):
        threshold_value = min(255, threshold_value + 5)
    elif key == ord('-') or key == ord('_'):
        threshold_value = max(0, threshold_value - 5)

In [None]:
cap.release()
cv2.destroyAllWindows()