In [2]:
from tkinter import *
import numpy as np
from PIL import ImageGrab
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import cv2

In [3]:
def draw_canvas():
    model = load_model('ocr.h5')
    # Create a black canvas
    canvas = np.zeros((300, 300), dtype=np.uint8)
    drawing = False
    last_x, last_y = None, None
    predicted_digit = None

    def draw(event, x, y, flags, param):
        nonlocal drawing, last_x, last_y, canvas, predicted_digit
        if event == cv2.EVENT_LBUTTONDOWN:
            drawing = True
            last_x, last_y = x, y
        elif event == cv2.EVENT_MOUSEMOVE:
            if drawing:
                cv2.line(canvas, (last_x, last_y), (x, y), 255, 20)
                last_x, last_y = x, y
        elif event == cv2.EVENT_LBUTTONUP:
            drawing = False
            # Predict the digit when the user stops drawing
            digit_image = canvas.copy()
            digit_image = cv2.resize(digit_image, (28, 28))  # Resize to match input shape of the model
            digit_image = cv2.dilate(digit_image, None, iterations=1)  # Enhance thickness
            digit_image = cv2.erode(digit_image, None, iterations=1)  # Clean up
            digit_image = digit_image.flatten().reshape(1, 28, 28, 1)  # Reshape for the model
            digit_image = digit_image.astype('float32') / 255  # Normalize image

            # Predict the digit
            predicted_digit = model.predict(digit_image)
            predicted_digit = np.argmax(predicted_digit, axis=1)  # Get the index of the predicted class
    
    # Set up the window and the mouse callback function
    cv2.namedWindow("Handwritten Digit Recognition")
    cv2.setMouseCallback("Handwritten Digit Recognition", draw)

    while True:
        cv2.imshow("Handwritten Digit Recognition", canvas)

        # If a digit is predicted, display the result in a separate window
        if predicted_digit is not None:
            output = np.zeros((400, 400), dtype=np.uint8)  # Increased window size
            cv2.putText(output, f"Predicted: {predicted_digit[0]}", (50, 200),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.imshow("Prediction", output)

        key = cv2.waitKey(1) & 0xFF

        # Check for user input to either clear the canvas or quit
        if key == ord('q'):  # Press 'Q' to quit
            break
        elif key == ord('c'):  # Press 'C' to clear the canvas
            canvas = np.zeros((300, 300), dtype=np.uint8)
            predicted_digit = None

    cv2.destroyAllWindows()

In [4]:
def camera_capture():
    # Initialize webcam
    model = load_model('ocr.h5')
    cap = cv2.VideoCapture(0)

    while(True):
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        
        # Draw rectangles around the faces
        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

            # Display "Hello!!!" and a smiley emoji near the face
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(frame, "Hello!!!", (x, y - 10), font, 1, (0, 255, 0), 2, cv2.LINE_AA)

            # Display a smiley emoji (use a simple smiley character as text or use a custom image)
            smiley = ":)"  # You can use a more advanced emoji display here, but for simplicity, we'll use text
            cv2.putText(frame, smiley, (x + w - 30, y + h + 30), font, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Apply Gaussian Blur
        blur = cv2.GaussianBlur(gray, (5, 5), 0)

        # Thresholding
        ret, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Draw bounding box and extract ROI
        for contour in contours:
            # Calculate bounding rectangle
            x, y, w, h = cv2.boundingRect(contour)

            # Create a square ROI
            if w >= 20 and h >= 20:
                roi = thresh[y:y+h, x:x+w]
                roi = cv2.resize(roi, (28, 28))  # Resize to match MNIST input size
                roi = roi.reshape(1, 28, 28, 1) / 255.0  # Normalize and reshape

                # Predict digit
                prediction = model.predict(roi)
                digit = np.argmax(prediction)

                # Check prediction confidence
                confidence = np.max(prediction)
                if confidence > 0.8:  # Adjust confidence threshold as needed
                    # Draw bounding box and display predicted digit
                    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                    cv2.putText(frame, str(digit), (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Display the resulting frame
        cv2.imshow('Handwritten Digit Recognition', frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()

In [12]:
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
from tensorflow.keras.models import load_model
import cv2
import numpy as np
from PIL import Image, ImageTk

# Load the pre-trained OCR model (ensure the model file is in the correct location)
model = load_model('ocr.h5')

# Function to process image and predict the handwritten digit
def preprocess_image(image_path):
    # Read the image using OpenCV in grayscale
    org_img = cv2.imread(image_path)
    img = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
    
    # Resize the image to match the input size expected by the model (28x28 for MNIST-like models)
    img_resized = cv2.resize(img, (28, 28))

    # Normalize the image by dividing by 255.0
    img_normalized = img_resized / 255.0

    # Reshape the image for the model (model expects a batch of images)
    img_reshaped = img_normalized.reshape(1, 28, 28, 1)

    # Predict the digit using the model
    prediction = model.predict(img_reshaped)

    # Get the predicted digit (the class with the highest probability)
    predicted_digit = np.argmax(prediction)
    
    return predicted_digit, org_img  # Return the predicted digit and original image

# Function to open the file explorer and choose an image
def choose_image():
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg;*.bmp;*.tiff")])
    if file_path:
        try:
            # Process and predict the handwritten digit from the image
            predicted_digit, original_img = preprocess_image(file_path)

            # Display the result in a new window
            result_window(predicted_digit, original_img)
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred while processing the image: {str(e)}")

# Function to create a result window to display the predicted digit and original image
def result_window(predicted_digit, original_img):
    result_window = tk.Toplevel(window)
    result_window.title("Prediction Result")

    # Display the predicted digit
    label = tk.Label(result_window, text=f"Predicted Handwritten Digit: {predicted_digit}", font=("Arial", 20))
    label.pack(pady=10)

    # Convert the image from OpenCV (BGR) to RGB for Pillow
    original_img_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    
    # Convert to a Pillow Image
    pil_image = Image.fromarray(original_img_rgb)

    # Resize the image for better display in the window
    pil_image_resized = pil_image.resize((200, 200))

    # Display the original image in the window
    img_tk = ImageTk.PhotoImage(pil_image_resized)
    img_label = tk.Label(result_window, image=img_tk)
    img_label.image = img_tk  # Keep a reference to the image
    img_label.pack(pady=20)




In [22]:
window = Tk()
window.title("Handwritten digit recognition")
l1 = Label()
lastx, lasty = None, None


# Label
L1 = Label(window, text="Optical Character Recognition (OCR) of Digits", font=('Algerian', 20), fg="dark blue")
L1.place(x=35, y=10)
L2 = Label(window, text="Handwritten Digit Recoginition", font=('Algerian', 18), fg="black")
L2.place(x=145, y=70)

b2 = Button(window, text="1. Choose Image", font=("Arial", 15), bg="white", fg="red", command=choose_image)
b2.place(x=290, y=180)

# Button to predict digit drawn on canvas
b2 = Button(window, text="2. Write on canvas", font=('Arial', 15), bg="white", fg="red", command=draw_canvas)
b2.place(x=280, y=280)

# Button to predict digit drawn on canvas
b2 = Button(window, text="3. Predict", font=('Arial', 15), bg="white", fg="red", command=camera_capture)
b2.place(x=320, y=380)

# Setting properties of canvas
#cv = Canvas(window, width=350, height=290, bg='black')
#cv.place(x=120, y=70)

#cv.bind('<Button-1>', event_activation)
window.geometry("700x500")
window.mainloop()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
