In [1]:
!pip install opencv-python tensorflow numpy matplotlib scikit-learn




[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

class HandDigitRecognizer:
    def __init__(self):
        self.model = None
        self.is_trained = False
        
    def create_model(self):
        """Create a CNN model for digit recognition"""
        model = keras.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(10, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])
        
        self.model = model
        return model
    
    def train_model(self, epochs=10):
        """Train the model using MNIST dataset"""
        print("Loading MNIST dataset...")
        (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
        
        # Normalize pixel values
        x_train = x_train.astype('float32') / 255.0
        x_test = x_test.astype('float32') / 255.0
        
        # Reshape data to add channel dimension
        x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
        x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
        
        print("Training model...")
        history = self.model.fit(x_train, y_train,
                                epochs=epochs,
                                batch_size=128,
                                validation_data=(x_test, y_test),
                                verbose=1)
        
        # Evaluate model
        test_loss, test_acc = self.model.evaluate(x_test, y_test, verbose=0)
        print(f"Test accuracy: {test_acc:.4f}")
        
        self.is_trained = True
        return history
    
    def save_model(self, filepath='digit_recognition_model.h5'):
        """Save the trained model"""
        if self.model and self.is_trained:
            self.model.save(filepath)
            print(f"Model saved to {filepath}")
        else:
            print("No trained model to save")
    
    def load_model(self, filepath='digit_recognition_model.h5'):
        """Load a pre-trained model"""
        if os.path.exists(filepath):
            self.model = keras.models.load_model(filepath)
            self.is_trained = True
            print(f"Model loaded from {filepath}")
        else:
            print(f"Model file {filepath} not found")
    
    def preprocess_image(self, img):
        """Preprocess image for prediction"""
        # Convert to grayscale if needed
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Resize to 28x28
        img = cv2.resize(img, (28, 28))
        
        # Normalize pixel values
        img = img.astype('float32') / 255.0
        
        # Reshape for model input
        img = img.reshape(1, 28, 28, 1)
        
        return img
    
    def predict_digit(self, img):
        """Predict digit from image"""
        if not self.is_trained:
            return None, 0
        
        processed_img = self.preprocess_image(img)
        prediction = self.model.predict(processed_img, verbose=0)
        digit = np.argmax(prediction)
        confidence = np.max(prediction)
        
        return digit, confidence
    
    def run_webcam_recognition(self):
        """Run real-time digit recognition from webcam"""
        if not self.is_trained:
            print("Model not trained! Please train the model first.")
            return
        
        # Initialize webcam
        cap = cv2.VideoCapture(0)
        
        if not cap.isOpened():
            print("Error: Could not open webcam")
            return
        
        print("Starting webcam digit recognition...")
        print("Instructions:")
        print("- Draw digits in the green rectangle")
        print("- Press 'c' to clear the drawing area")
        print("- Press 'q' to quit")
        
        # Create a blank canvas for drawing
        canvas = np.zeros((400, 400), dtype=np.uint8)
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Flip frame horizontally for mirror effect
            frame = cv2.flip(frame, 1)
            
            # Define drawing area
            drawing_area = (50, 50, 300, 300)  # x, y, width, height
            
            # Draw rectangle for drawing area
            cv2.rectangle(frame, (drawing_area[0], drawing_area[1]), 
                         (drawing_area[0] + drawing_area[2], drawing_area[1] + drawing_area[3]), 
                         (0, 255, 0), 2)
            
            # Extract region of interest
            roi = frame[drawing_area[1]:drawing_area[1] + drawing_area[3],
                       drawing_area[0]:drawing_area[0] + drawing_area[2]]
            
            # Convert to grayscale
            roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            
            # Apply threshold to get binary image
            _, roi_thresh = cv2.threshold(roi_gray, 127, 255, cv2.THRESH_BINARY)
            
            # Invert image (white digit on black background)
            roi_thresh = cv2.bitwise_not(roi_thresh)
            
            # Predict digit
            digit, confidence = self.predict_digit(roi_thresh)
            
            # Display prediction
            if digit is not None and confidence > 0.5:
                cv2.putText(frame, f"Digit: {digit}", (10, 30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, f"Confidence: {confidence:.2f}", (10, 70), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            
            # Display instructions
            cv2.putText(frame, "Draw digit in green box", (10, frame.shape[0] - 60), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, "Press 'c' to clear, 'q' to quit", (10, frame.shape[0] - 30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # Show processed ROI in corner
            roi_display = cv2.resize(roi_thresh, (100, 100))
            frame[10:110, frame.shape[1]-110:frame.shape[1]-10] = cv2.cvtColor(roi_display, cv2.COLOR_GRAY2BGR)
            
            cv2.imshow('Hand Digit Recognition', frame)
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('c'):
                canvas = np.zeros((400, 400), dtype=np.uint8)
        
        cap.release()
        cv2.destroyAllWindows()

def main():
    """Main function to run the digit recognition system"""
    recognizer = HandDigitRecognizer()
    
    print("Hand Digit Recognition System")
    print("============================")
    
    # Try to load existing model
    recognizer.load_model()
    
    # If no model exists, create and train one
    if not recognizer.is_trained:
        print("No pre-trained model found. Training new model...")
        recognizer.create_model()
        recognizer.train_model(epochs=5)  # Reduced epochs for faster training
        recognizer.save_model()
    
    # Start webcam recognition
    recognizer.run_webcam_recognition()

if __name__ == "__main__":
    main()

Hand Digit Recognition System
Model file digit_recognition_model.h5 not found
No pre-trained model found. Training new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Loading MNIST dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Training model...
Epoch 1/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 38ms/step - accuracy: 0.7864 - loss: 0.6746 - val_accuracy: 0.9828 - val_loss: 0.0528
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 37ms/step - accuracy: 0.9759 - loss: 0.0803 - val_accuracy: 0.9867 - val_loss: 0.0385
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 36ms/step - accuracy: 0.9842 - loss: 0.0517 - val_accuracy: 0.9902 - val_loss: 0.0315
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 35ms/step - accuracy: 0.9880 - loss: 0.0383 - val_accuracy: 0.9886 - val_loss: 0.0347
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 39ms/step - accuracy: 0.9896 - loss: 0.0331 - val_a



Test accuracy: 0.9898
Model saved to digit_recognition_model.h5
Starting webcam digit recognition...
Instructions:
- Draw digits in the green rectangle
- Press 'c' to clear the drawing area
- Press 'q' to quit


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from scipy import ndimage

class ImprovedHandDigitRecognizer:
    def __init__(self):
        self.model = None
        self.is_trained = False
        self.drawing = False
        self.canvas = np.zeros((400, 400), dtype=np.uint8)
        self.last_prediction = None
        self.prediction_history = []
        
    def create_model(self):
        """Create an improved CNN model for digit recognition"""
        model = keras.Sequential([
            # First Conv Block
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            layers.BatchNormalization(),
            layers.Conv2D(32, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Second Conv Block
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Third Conv Block
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.25),
            
            # Dense layers
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(10, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])
        
        self.model = model
        return model
    
    def augment_data(self, x_train, y_train):
        """Apply data augmentation to improve model robustness"""
        datagen = keras.preprocessing.image.ImageDataGenerator(
            rotation_range=10,
            width_shift_range=0.1,
            height_shift_range=0.1,
            zoom_range=0.1,
            shear_range=0.1
        )
        
        return datagen.flow(x_train, y_train, batch_size=32)
    
    def train_model(self, epochs=15):
        """Train the improved model with data augmentation"""
        print("Loading MNIST dataset...")
        (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
        
        # Normalize pixel values
        x_train = x_train.astype('float32') / 255.0
        x_test = x_test.astype('float32') / 255.0
        
        # Reshape data
        x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
        x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
        
        # Create data augmentation generator
        train_generator = self.augment_data(x_train, y_train)
        
        print("Training improved model...")
        history = self.model.fit(
            train_generator,
            steps_per_epoch=len(x_train) // 32,
            epochs=epochs,
            validation_data=(x_test, y_test),
            verbose=1
        )
        
        # Evaluate model
        test_loss, test_acc = self.model.evaluate(x_test, y_test, verbose=0)
        print(f"Test accuracy: {test_acc:.4f}")
        
        self.is_trained = True
        return history
    
    def save_model(self, filepath='improved_digit_model.h5'):
        """Save the trained model"""
        if self.model and self.is_trained:
            self.model.save(filepath)
            print(f"Model saved to {filepath}")
        else:
            print("No trained model to save")
    
    def load_model(self, filepath='improved_digit_model.h5'):
        """Load a pre-trained model"""
        if os.path.exists(filepath):
            self.model = keras.models.load_model(filepath)
            self.is_trained = True
            print(f"Model loaded from {filepath}")
        else:
            print(f"Model file {filepath} not found")
    
    def preprocess_digit_image(self, img):
        """Advanced preprocessing for better digit recognition"""
        # Convert to grayscale if needed
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Apply Gaussian blur to reduce noise
        img = cv2.GaussianBlur(img, (5, 5), 0)
        
        # Apply adaptive threshold for better binarization
        img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)
        
        # Find contours to locate the digit
        contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Find the largest contour (assumed to be the digit)
            largest_contour = max(contours, key=cv2.contourArea)
            
            # Get bounding box
            x, y, w, h = cv2.boundingRect(largest_contour)
            
            # Extract digit region with some padding
            padding = 10
            x = max(0, x - padding)
            y = max(0, y - padding)
            w = min(img.shape[1] - x, w + 2 * padding)
            h = min(img.shape[0] - y, h + 2 * padding)
            
            digit_img = img[y:y+h, x:x+w]
            
            # Make the image square by adding padding
            if w > h:
                pad_top = (w - h) // 2
                pad_bottom = w - h - pad_top
                digit_img = np.pad(digit_img, ((pad_top, pad_bottom), (0, 0)), 'constant')
            elif h > w:
                pad_left = (h - w) // 2
                pad_right = h - w - pad_left
                digit_img = np.pad(digit_img, ((0, 0), (pad_left, pad_right)), 'constant')
        else:
            digit_img = img
        
        # Resize to 28x28
        digit_img = cv2.resize(digit_img, (28, 28))
        
        # Apply morphological operations to clean up
        kernel = np.ones((2, 2), np.uint8)
        digit_img = cv2.morphologyEx(digit_img, cv2.MORPH_CLOSE, kernel)
        
        # Normalize pixel values
        digit_img = digit_img.astype('float32') / 255.0
        
        # Reshape for model input
        digit_img = digit_img.reshape(1, 28, 28, 1)
        
        return digit_img
    
    def predict_digit_stable(self, img):
        """Predict digit with stability filtering"""
        if not self.is_trained:
            return None, 0
        
        processed_img = self.preprocess_digit_image(img)
        prediction = self.model.predict(processed_img, verbose=0)
        digit = np.argmax(prediction)
        confidence = np.max(prediction)
        
        # Add to prediction history for stability
        self.prediction_history.append((digit, confidence))
        
        # Keep only last 5 predictions
        if len(self.prediction_history) > 5:
            self.prediction_history.pop(0)
        
        # Use majority vote for stable prediction
        if len(self.prediction_history) >= 3:
            recent_digits = [pred[0] for pred in self.prediction_history[-3:]]
            recent_confidences = [pred[1] for pred in self.prediction_history[-3:]]
            
            # Check if we have consistent predictions
            if len(set(recent_digits)) == 1 and min(recent_confidences) > 0.6:
                return recent_digits[0], np.mean(recent_confidences)
        
        return digit, confidence
    
    def mouse_callback(self, event, x, y, flags, param):
        """Mouse callback for drawing on canvas"""
        if event == cv2.EVENT_LBUTTONDOWN:
            self.drawing = True
        elif event == cv2.EVENT_MOUSEMOVE and self.drawing:
            cv2.circle(self.canvas, (x, y), 8, 255, -1)
        elif event == cv2.EVENT_LBUTTONUP:
            self.drawing = False
    
    def run_drawing_recognition(self):
        """Run digit recognition with drawing canvas"""
        if not self.is_trained:
            print("Model not trained! Please train the model first.")
            return
        
        print("Starting drawing-based digit recognition...")
        print("Instructions:")
        print("- Draw digits with your mouse in the window")
        print("- Press 'c' to clear the canvas")
        print("- Press 'p' to predict the current drawing")
        print("- Press 'q' to quit")
        
        cv2.namedWindow('Draw Digit Here')
        cv2.setMouseCallback('Draw Digit Here', self.mouse_callback)
        
        while True:
            # Create display image
            display_img = cv2.cvtColor(self.canvas, cv2.COLOR_GRAY2BGR)
            
            # Add instructions
            cv2.putText(display_img, "Draw a digit here", (10, 30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            cv2.putText(display_img, "Press 'p' to predict", (10, 360), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(display_img, "Press 'c' to clear", (10, 380), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # Show last prediction
            if self.last_prediction:
                digit, confidence = self.last_prediction
                cv2.putText(display_img, f"Prediction: {digit} ({confidence:.2f})", 
                           (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
            
            cv2.imshow('Draw Digit Here', display_img)
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('c'):
                self.canvas = np.zeros((400, 400), dtype=np.uint8)
                self.last_prediction = None
                self.prediction_history = []
            elif key == ord('p'):
                if np.sum(self.canvas) > 0:  # Check if something is drawn
                    digit, confidence = self.predict_digit_stable(self.canvas)
                    self.last_prediction = (digit, confidence)
                    print(f"Predicted: {digit} (confidence: {confidence:.3f})")
        
        cv2.destroyAllWindows()
    
    def run_webcam_recognition(self):
        """Run improved webcam digit recognition"""
        if not self.is_trained:
            print("Model not trained! Please train the model first.")
            return
        
        cap = cv2.VideoCapture(0)
        
        if not cap.isOpened():
            print("Error: Could not open webcam")
            return
        
        print("Starting improved webcam digit recognition...")
        print("Instructions:")
        print("- Hold up handwritten digits to the camera")
        print("- Keep digits steady for better recognition")
        print("- Press 'q' to quit")
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Flip frame horizontally
            frame = cv2.flip(frame, 1)
            
            # Define region of interest (center of frame)
            h, w = frame.shape[:2]
            roi_size = 200
            x1 = (w - roi_size) // 2
            y1 = (h - roi_size) // 2
            x2 = x1 + roi_size
            y2 = y1 + roi_size
            
            # Draw ROI rectangle
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # Extract ROI
            roi = frame[y1:y2, x1:x2]
            
            # Convert to grayscale
            roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            
            # Predict digit
            digit, confidence = self.predict_digit_stable(roi_gray)
            
            # Display prediction only if confidence is high
            if digit is not None and confidence > 0.7:
                cv2.putText(frame, f"Digit: {digit}", (10, 50), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)
                cv2.putText(frame, f"Confidence: {confidence:.2f}", (10, 100), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # Show processed image in corner
            processed_roi = self.preprocess_digit_image(roi_gray)
            processed_display = (processed_roi.reshape(28, 28) * 255).astype(np.uint8)
            processed_display = cv2.resize(processed_display, (100, 100))
            frame[10:110, w-110:w-10] = cv2.cvtColor(processed_display, cv2.COLOR_GRAY2BGR)
            
            # Add instructions
            cv2.putText(frame, "Hold digit in green box", (10, h-40), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            cv2.putText(frame, "Press 'q' to quit", (10, h-10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            
            cv2.imshow('Improved Digit Recognition', frame)
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
        
        cap.release()
        cv2.destroyAllWindows()

def main():
    """Main function with mode selection"""
    recognizer = ImprovedHandDigitRecognizer()
    
    print("Improved Hand Digit Recognition System")
    print("=====================================")
    
    # Try to load existing model
    recognizer.load_model()
    
    # If no model exists, create and train one
    if not recognizer.is_trained:
        print("No pre-trained model found. Training improved model...")
        recognizer.create_model()
        recognizer.train_model(epochs=10)
        recognizer.save_model()
    
    # Mode selection
    print("\nSelect mode:")
    print("1. Drawing mode (recommended for testing)")
    print("2. Webcam mode")
    
    choice = input("Enter your choice (1 or 2): ")
    
    if choice == '1':
        recognizer.run_drawing_recognition()
    elif choice == '2':
        recognizer.run_webcam_recognition()
    else:
        print("Invalid choice. Starting drawing mode...")
        recognizer.run_drawing_recognition()

if __name__ == "__main__":
    main()

Improved Hand Digit Recognition System




Model loaded from improved_digit_model.h5

Select mode:
1. Drawing mode (recommended for testing)
2. Webcam mode


Enter your choice (1 or 2):  2


Starting improved webcam digit recognition...
Instructions:
- Hold up handwritten digits to the camera
- Keep digits steady for better recognition
- Press 'q' to quit
