In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.datasets import mnist
from tensorflow.keras.preprocessing.image import ImageDataGenerator

  if not hasattr(np, "object"):


In [None]:
class NeuralNetwork:
    def __init__(self, input_size=784, hidden_size=128, output_size=10):
        self.lr = 0.01
        
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(1. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def relu(self, x, der=False):
        if der:
            return (x > 0).astype(float)
        return np.maximum(0, x)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def feedForward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.probs = self.softmax(self.z2)
        return self.probs

    def backPropagation(self, X, Y_oh):
        m = X.shape[0] 
        
        dz2 = self.probs - Y_oh
        dW2 = (1/m) * np.dot(self.a1.T, dz2)
        db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
        
        # Hidden Layer Gradient
        da1 = np.dot(dz2, self.W2.T)
        dz1 = da1 * self.relu(self.z1, der=True)
        dW1 = (1/m) * np.dot(X.T, dz1)
        db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)
        
        # Parameter Updates
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2

    def train(self, X, Y):
        self.feedForward(X)
        self.backPropagation(X, Y)

In [None]:
# Load Data
(train_img, train_label), (test_img, test_label) = mnist.load_data()

# Preprocess
train_img = train_img.reshape(train_img.shape[0], 784) / 255.0
test_img = test_img.reshape(test_img.shape[0], 784) / 255.0

# One-Hot Encode Labels
def one_hot(y):
    oh = np.zeros((y.size, 10))
    oh[np.arange(y.size), y] = 1
    return oh

Y_train_oh = one_hot(train_label)

# Training
nn = NeuralNetwork()
epochs = 10
batch_size = 32

for epoch in range(epochs):
    for i in range(0, len(train_img), batch_size):
        X_batch = train_img[i:i+batch_size]
        Y_batch = Y_train_oh[i:i+batch_size]
        nn.train(X_batch, Y_batch)
    
    # Check Accuracy on Test Set 
    predictions = nn.feedForward(test_img)
    accuracy = np.mean(np.argmax(predictions, axis=1) == test_label)
    print(f"Epoch {epoch+1} Accuracy: {accuracy * 100:.2f}%")

Epoch 1 Accuracy: 89.76%
Epoch 2 Accuracy: 91.26%
Epoch 3 Accuracy: 92.28%
Epoch 4 Accuracy: 92.94%
Epoch 5 Accuracy: 93.48%
Epoch 6 Accuracy: 93.96%
Epoch 7 Accuracy: 94.25%
Epoch 8 Accuracy: 94.52%
Epoch 9 Accuracy: 94.87%
Epoch 10 Accuracy: 95.10%


In [None]:
cap = cv2.VideoCapture(0)

while True:
    check, frame = cap.read()
    if not check: break

    # Define ROI 
    x, y, w, h = 200, 150, 200, 200
    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

    # Extract and Preprocess the ROI
    roi = frame[y:y+h, x:x+w]
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    gray_roi = cv2.bitwise_not(gray_roi) 
    
    # Resize to 28x28 
    resized_roi = cv2.resize(gray_roi, (28, 28), interpolation=cv2.INTER_AREA)
    
    # Normalize and flatten
    normalized_roi = resized_roi.reshape(1, 784) / 255.0

    # Predict 
    pred_probs = nn.feedForward(normalized_roi)
    prediction = np.argmax(pred_probs)
    confidence = np.max(pred_probs)

    # Display result
    label = f"Digit: {prediction} ({confidence*100:.1f}%)"
    cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

    cv2.imshow("MNIST Live Feed", frame)
    cv2.imshow("What the NN sees (ROI)", resized_roi)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()