In [1]:
import import_ipynb
import handtracker as htm
import time
import autopy
import numpy as np
impo2rt cv
import copy
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
import mediapipe as mp
import math
from pynput.keyboard import Key, Controller

importing Jupyter notebook from handtracker.ipynb


In [2]:
class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()
        # convolutional layer
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 3 * 3, 256)
        self.fc2 = nn.Linear(256, 26)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # Adding sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 3 * 3)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

def get_histogram(frame):
    roi1 = frame[rect1_tl[1]:rect1_tl[1] + width, rect1_tl[0]:rect1_tl[0] + height]
    roi2 = frame[rect2_tl[1]:rect2_tl[1] + width, rect2_tl[0]:rect2_tl[0] + height]
    roi3 = frame[rect3_tl[1]:rect3_tl[1] + width, rect3_tl[0]:rect3_tl[0] + height]
    roi4 = frame[rect4_tl[1]:rect4_tl[1] + width, rect4_tl[0]:rect4_tl[0] + height]
    roi5 = frame[rect5_tl[1]:rect5_tl[1] + width, rect5_tl[0]:rect5_tl[0] + height]
    roi = np.concatenate((roi1, roi2, roi3, roi4, roi5), axis=0)
    roi_hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

    return cv2.calcHist([roi_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])

def get_ROI(canvas):
    gray = cv2.bitwise_not(canvas)
    ret, thresh = cv2.threshold(gray, 90, 255, cv2.THRESH_BINARY_INV)
    ctrs, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    areas = []
    for i in range(len(ctrs)):
        x, y, w, h = cv2.boundingRect(ctrs[i])
        areas.append((w * h, i))

    def sort_second(val):
        return val[0]
    areas.sort(key=sort_second, reverse=True)
    x, y, w, h = cv2.boundingRect(ctrs[areas[1][1]])
    cv2.rectangle(canvas, (x, y), (x + w, y + h), (255, 255, 0), 1)
    roi = gray[y:y + h, x:x + w]
    return roi

def character_prediction(roi, model):
    """Predicts character written with image processing"""
    img = cv2.resize(roi, (28, 28))
    img = cv2.GaussianBlur(img, (3, 3), 0)
    img = Image.fromarray(img)

    normalize = transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
    preprocess = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))
    ])

    p_img = preprocess(img)

    model.eval()
    p_img = p_img.reshape([1, 1, 28, 28]).float()
    output = model(torch.transpose(p_img, 2, 3))
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.numpy())
    return preds

In [None]:
##########################
wCam, hCam = 640, 480
frameR = 100 # Frame Reduction
smoothening = 7
#########################

pTime = 0
plocX, plocY = 0, 0
clocX, clocY = 0, 0

cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
detector = htm.handDetector(maxHands=1)
wScr, hScr = autopy.screen.size()
# print(wScr, hScr)

canvas = np.zeros((720, 1280), np.uint8)
far_points = []
model = Cnn()
model.load_state_dict(torch.load('model_emnist.pt', map_location='cpu'))
font = cv2.FONT_HERSHEY_SIMPLEX
prediction= 0
while True:
    try:
        # 1. Find hand Landmarks
        success, img = cap.read()
        img = detector.findHands(img)
        lmList, bbox = detector.findPosition(img)
        # 2. Get the tip of the index and middle fingers
        if len(lmList) != 0:
            x1, y1 = lmList[8][1:]
            x2, y2 = lmList[12][1:]
            # print(x1, y1, x2, y2)
        if len(lmList) != 0:
            #print(lmList[8][1:])
            #far = lmList[8][1:]
            far = (lmList[8][1],lmList[8][2])
        canvas[:, :] = 255

        # 3. Check which fingers are up
        fingers = detector.fingersUp()

        # 4. Only Index Finger : Moving Mode
        if fingers[1] == 1 and fingers[2] == 0 and fingers[3] == 0:
            cv2.putText(img,"Moving Mode", (100, 45), font, 1,(255, 255, 255), 2, cv2.LINE_AA)
            # 5. Convert Coordinates
            x3 = np.interp(x1, (frameR, wCam - frameR), (0, wScr))
            y3 = np.interp(y1, (frameR, hCam - frameR), (0, hScr))
            # 6. Smoothen Values
            clocX = plocX + (x3 - plocX) / smoothening
            clocY = plocY + (y3 - plocY) / smoothening

            # 7. Move Mouse
            autopy.mouse.move(wScr - clocX, clocY)
            cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
            plocX, plocY = clocX, clocY

        # 8. Both Index and middle fingers are up : Clicking Mode
        if fingers[1] == 1 and fingers[2] == 1 and fingers[3] == 0:
            cv2.putText(img,"Clicking Mode", (100, 45), font, 1,(255, 255, 255), 2, cv2.LINE_AA)
            # 9. Find distance between fingers
            length, img, lineInfo = detector.findDistance(8, 12, img)
            # 10. Click mouse if distance short
            if length < 30:
                cv2.circle(img, (lineInfo[4], lineInfo[5]),
                15, (0, 255, 0), cv2.FILLED)
                autopy.mouse.click()

        if fingers[1] == 1 and fingers[2] == 1 and fingers[3] == 1:
            cv2.putText(img,"Character Written: " + chr(prediction + 65), (8, 80), font, 1,(255, 255, 255), 2, cv2.LINE_AA)
            cv2.putText(img,"Drawing Mode", (100, 45), font, 1,(255, 255, 255), 2, cv2.LINE_AA)
            if len(far_points) > 100:
                far_points.pop(0)
            far_points.append(far)
            #print(far_points)
            for i in range(len(far_points) - 1):
                cv2.line(img, far_points[i], far_points[i + 1], (255, 5, 255), 20)
                cv2.line(canvas, far_points[i], far_points[i + 1], (0, 0, 0), 20)
                
            length, img, lineInfo = detector.findDistance(4, 20, img)
            if length < 30:
                # Prediction
                #cv2.circle(img, (lineInfo[4], lineInfo[5]),15, (0, 255, 0), cv2.FILLED)
                roi = get_ROI(canvas)
                #print(roi)
                prediction = character_prediction(roi, model)
                #print(prediction)
                name = str(prediction) + '.jpg'
                cv2.imwrite(name, roi)
                
#                 cv2.putText(img,"Character Written: " + chr(prediction + 65), (8, 250), font, 1,
#                         (255, 255, 255), 2, cv2.LINE_AA)
                canvas[:, :] = 255
                far_points.clear()
                cv2.waitKey(500)
                keyboard = Controller()
                key = chr(prediction + 65)
                keyboard.press(key)
                keyboard.release(key)
                #print(chr(prediction + 65))
                
        if fingers[0] == 0 and fingers[1] == 0 and fingers[2] == 0 and fingers[3] == 0 and fingers[4] == 0:
            canvas[:, :] = 255
            far_points.clear()
            cv2.waitKey(500)
            
                
        # 11. Frame Rate
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime
        cv2.putText(img, str(int(fps)), (30, 45), cv2.FONT_HERSHEY_PLAIN, 3,
        (255, 0, 0), 3)
        # 12. Display
        cv2.imshow("Image", img)
        cv2.waitKey(1)
    except:continue
        
        