In [17]:
import cv2
import mediapipe as mp
import numpy as np
import pytesseract
from scipy.interpolate import splprep, splev
import re  # For regular expression matching

# Set up pytesseract (specify the path to tesseract executable if necessary)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

class Sprite:
    def __init__(self, name):
        self.name = name
        self.x = 0
        self.y = 0
        self.size = 1
        self.visible = False
        self.path_points = []
        self.max_points = 200  # Reduced number of points for simpler drawing

    def setx(self, x):
        self.x = int(x)

    def sety(self, y):
        self.y = int(y)

    def show(self):
        self.visible = True

    def hide(self):
        self.visible = False

    def draw_path(self, frame, thickness=5):
        if len(self.path_points) > 1:
            smoothed_path = self.smooth_path(self.path_points)
            for i in range(1, len(smoothed_path)):
                cv2.line(frame, smoothed_path[i - 1], smoothed_path[i], (255, 255, 255), thickness)

    def add_point(self):
        if not self.path_points or np.linalg.norm(np.array([self.x, self.y]) - np.array(self.path_points[-1])) > 10:  # Increased threshold
            self.path_points.append((self.x, self.y))
            if len(self.path_points) > self.max_points:
                self.path_points.pop(0)

    def clear_path(self):
        self.path_points = []

    def smooth_path(self, path_points):
        if len(path_points) < 2:
            return path_points
        
        # Convert path points to numpy array
        path_points = np.array(path_points)
        
        # If there are not enough points for spline interpolation, use linear interpolation
        if len(path_points) < 8:
            return path_points.tolist()

        # Fit spline to the path points
        tck, u = splprep([path_points[:, 0], path_points[:, 1]], s=0, k=3)
        
        # Generate smooth points along the spline
        u_fine = np.linspace(0, 1, num=len(path_points)*10)  # Increase number of points for smoothness
        new_points = splev(u_fine, tck)
        smoothed_path = list(zip(map(int, new_points[0]), map(int, new_points[1])))
        
        return smoothed_path

def preprocess_image(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.GaussianBlur(gray_image, (5, 5), 0)  # Increased blur for smoother edges
    binary_image = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    return binary_image

def get_next_task():
    # For simplicity, return a fixed task or shape name
    return 'O'  # Example task, you can implement more complex task logic

def filter_alphabetical(text):
    # Keep only alphabetical characters and return as uppercase
    return ''.join(re.findall(r'[A-Za-z]', text)).upper()

# Create Sprite object for drawing
index_finger = Sprite('IndexFinger')

# Initialize MediaPipe for hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Start video capture
cap = cv2.VideoCapture(0)

# Initialize game state
current_task = get_next_task()
task_complete = False

try:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame for faster processing
        small_frame = cv2.resize(frame, (640, 480))
        frame = cv2.flip(small_frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                index_pos = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                index_finger.setx(index_pos.x * frame.shape[1])
                index_finger.sety(index_pos.y * frame.shape[0])
                index_finger.show()
                index_finger.add_point()
                index_finger.draw_path(frame)

        else:
            index_finger.hide()

        cv2.imshow('Hand Tracking with Drawing', frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('c'):
            index_finger.clear_path()
        elif key == ord('r'):
            if len(index_finger.path_points) > 1:
                blank_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8)
                index_finger.draw_path(blank_image, thickness=10)
                processed_image = preprocess_image(blank_image)
                text = pytesseract.image_to_string(processed_image, config='--psm 10')
                recognized_text = filter_alphabetical(text)  # Filter to alphabetical characters
                print(f'Recognized letter: {recognized_text}')

                if recognized_text == current_task:
                    print("Correct! Task completed.")
                    task_complete = True
                else:
                    print("Try again!")

        elif key == ord('n') or key == ord('N'):
            if task_complete:
                current_task = get_next_task()
                print(f'New task: {current_task}')
                index_finger.clear_path()
                task_complete = False
            else:
                print("Complete the current task before moving to the next.")

finally:
    cap.release()
    cv2.destroyAllWindows()


Recognized letter: I
Try again!
Recognized letter: A
Try again!
Recognized letter: O
Correct! Task completed.
Recognized letter: I
Try again!
Recognized letter: N
Try again!
Recognized letter: T
Try again!
Recognized letter: L
Try again!
Recognized letter: 
Try again!


In [30]:
import cv2
import mediapipe as mp
import numpy as np
import pytesseract
from scipy.interpolate import splprep, splev
import re
from collections import Counter

# Set up pytesseract (specify the path to tesseract executable if necessary)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

class Sprite:
    def __init__(self, name):
        self.name = name
        self.x = 0
        self.y = 0
        self.size = 1
        self.visible = False
        self.path_points = []
        self.max_points = 200  # Reduced number of points for simpler drawing

    def setx(self, x):
        self.x = int(x)

    def sety(self, y):
        self.y = int(y)

    def show(self):
        self.visible = True

    def hide(self):
        self.visible = False

    def draw_path(self, frame, thickness=5):
        if len(self.path_points) > 1:
            smoothed_path = self.smooth_path(self.path_points)
            for i in range(1, len(smoothed_path)):
                cv2.line(frame, smoothed_path[i - 1], smoothed_path[i], (255, 255, 255), thickness)

    def add_point(self):
        if not self.path_points or np.linalg.norm(np.array([self.x, self.y]) - np.array(self.path_points[-1])) > 10:  # Increased threshold
            self.path_points.append((self.x, self.y))
            if len(self.path_points) > self.max_points:
                self.path_points.pop(0)

    def clear_path(self):
        self.path_points = []

    def smooth_path(self, path_points):
        if len(path_points) < 2:
            return path_points
        
        # Convert path points to numpy array
        path_points = np.array(path_points)
        
        # If there are not enough points for spline interpolation, use linear interpolation
        if len(path_points) < 8:
            return path_points.tolist()

        # Fit spline to the path points
        tck, u = splprep([path_points[:, 0], path_points[:, 1]], s=0, k=3)
        
        # Generate smooth points along the spline
        u_fine = np.linspace(0, 1, num=len(path_points)*10)  # Increase number of points for smoothness
        new_points = splev(u_fine, tck)
        smoothed_path = list(zip(map(int, new_points[0]), map(int, new_points[1])))
        
        return smoothed_path

def preprocess_image(image):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray_image, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    
    # Perform morphological operations
    kernel = np.ones((3,3), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    # Dilate to connect components
    dilation = cv2.dilate(closing, kernel, iterations=1)
    
    return dilation

def recognize_text(image):
    # Prepare multiple versions of the image
    processed_image = preprocess_image(image)
    inverted_image = cv2.bitwise_not(processed_image)
    
    # Recognize text with different PSM modes and image versions
    configs = [
        '--psm 10 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ',
        '--psm 8 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ',
        '--psm 6 --oem 3 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    ]
    
    results = []
    for config in configs:
        results.append(pytesseract.image_to_string(processed_image, config=config).strip())
        results.append(pytesseract.image_to_string(inverted_image, config=config).strip())
    
    # Post-process results
    filtered_results = [filter_alphabetical(text) for text in results if text]
    
    # Choose the most common result, or the first one if there's no consensus
    if filtered_results:
        return Counter(filtered_results).most_common(1)[0][0]
    return ''

def filter_alphabetical(text):
    # Keep only alphabetical characters and return as uppercase
    return ''.join(re.findall(r'[A-Za-z]', text)).upper()

def get_next_task():
    # For simplicity, return a fixed task or shape name
    return 'O'  # Example task, you can implement more complex task logic

def overlay_text(frame, text, position=(50, 50), font_scale=2, color=(0, 255, 0), thickness=2):
    """Overlay text on the frame."""
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(frame, text, position, font, font_scale, color, thickness, cv2.LINE_AA)

# Create Sprite object for drawing
index_finger = Sprite('IndexFinger')

# Initialize MediaPipe for hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Start video capture
cap = cv2.VideoCapture(0)

# Initialize game state
current_task = get_next_task()
task_complete = False
recognized_text = ''

# Create full screen window
cv2.namedWindow('Hand Tracking with Drawing', cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty('Hand Tracking with Drawing', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

try:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame for faster processing
        small_frame = cv2.resize(frame, (640, 480))
        frame = cv2.flip(small_frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            # Process the first detected hand (assumes only one hand is tracked)
            hand_landmarks = result.multi_hand_landmarks[0]
            index_pos = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            index_finger.setx(index_pos.x * frame.shape[1])
            index_finger.sety(index_pos.y * frame.shape[0])
            index_finger.show()
            index_finger.add_point()
            index_finger.draw_path(frame)

        else:
            index_finger.hide()

        # Display the frame with a dark background
        dark_frame = np.zeros_like(frame)
        index_finger.draw_path(dark_frame, thickness=10)
        
        # Display recognized text and current task
        if recognized_text:
            overlay_text(dark_frame, f"Recognized: {recognized_text}", position=(50, 50))
        overlay_text(dark_frame, f"Current Task: {current_task}", position=(50, 100))

        cv2.imshow('Hand Tracking with Drawing', dark_frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('c'):
            index_finger.clear_path()
            recognized_text = ''  # Clear recognized text when path is cleared
        elif key == ord('r'):
            if len(index_finger.path_points) > 1:
                blank_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8)
                index_finger.draw_path(blank_image, thickness=10)
                recognized_text = recognize_text(blank_image)
                
                print(f'Recognized letter: {recognized_text}')

                if recognized_text == current_task:
                    print("Correct! Task completed.")
                    task_complete = True
                else:
                    print("Try again!")

        elif key == ord('n') or key == ord('N'):
            if task_complete:
                current_task = get_next_task()
                print(f'New task: {current_task}')
                index_finger.clear_path()
                task_complete = False
                recognized_text = ''  # Clear recognized text when moving to the next task
            else:
                print("Complete the current task before moving to the next.")

finally:
    cap.release()
    cv2.destroyAllWindows()

Recognized letter: O
Correct! Task completed.
Recognized letter: O
Correct! Task completed.
Recognized letter: N
Try again!
Recognized letter: AN
Try again!
Recognized letter: A
Try again!
Recognized letter: T
Try again!
