In [1]:
import cv2 as cv
import imutils
import numpy as np
import sys
import os
import torch
import torchvision
from torch import nn, optim
import torch.nn.functional as F
import torch.utils.data
import torchvision.datasets as dset
import torchvision.utils as vutils
from torch.utils.data import DataLoader
from torch.autograd.variable import Variable
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import random
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.metrics.pairwise import euclidean_distances
from PIL import Image

In [2]:
#Using GPU
print(sys.version)
device = 'cuda'
#Checking for GPU
print(torch.version.cuda)
print(torch.cuda.device_count())
print(torch.cuda.is_available())
torch.cuda.empty_cache() 

3.7.6 (default, Jan  8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]
10.2
1
True


In [3]:
lookup = dict()
reverselookup = dict()

count = 0

for j in os.listdir('./mydata/'):
    if not j.startswith('ges'):
        lookup[j] = count
        reverselookup[count] = j
        count += 1
        
print(lookup)
print(reverselookup)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9}
{0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9'}


In [4]:
model = torch.load('./models/gest_full_googlenet.pth')

In [5]:
image_size = 256

def recognize_gest(img):    
    transform = transforms.Compose([
                transforms.Resize(size=224),  # Image net standards
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])  # Imagenet standards
                #transforms.ToTensor(), 
                #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                ])
    
    with torch.no_grad():
        img = transform(img)
        img = img.view(1, 3, 224, 224)
        #print(img.shape)
        img = img.to(device)#Passing the entire test set to the GPU
        test_out = model(img.float()).cpu()
        test_pred = torch.argmax(test_out, dim = 1, keepdim=True).cpu().numpy()
        #test_pred_np = test_pred.cpu().numpy() 
        test_pred = np.reshape(test_pred, test_pred.shape[0])
        print(reverselookup[test_pred[0]], end='')
        return str(reverselookup[test_pred[0]])

In [6]:
def run_avg(image, aWeight):
    global bg
    if bg is None:
        bg = image.astype('float')
        
        return
    cv.accumulateWeighted(image, bg, aWeight)
    #print(bg)

In [7]:
def segment (image, threshold=18):
    global bg
    #cv.imshow('back', np.uint8(bg))    
    #cv.imshow('Handy', image)
    diff = cv.absdiff(np.uint8(bg), image)
    #cv.imshow('diff', diff)
    
    blur = cv.GaussianBlur(diff, (3, 3), 0)
    blur = cv.medianBlur(diff, 11)
    
    #thresholded = cv.adaptiveThreshold(blur, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 5, 12)
    thresholded = cv.threshold(diff, threshold, 255, cv.THRESH_BINARY)[1]
    disc = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
    disc1 = cv.getStructuringElement(cv.MORPH_ELLIPSE, (2, 2))
    thresholded = cv.dilate(thresholded, disc, iterations =1)
    thresholded = cv.morphologyEx(thresholded, cv.MORPH_CLOSE, disc, iterations =5)
    
    (cnts, _) = cv.findContours(thresholded.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    
    if len(cnts) == 0:
        return
    else:
        segmented = max(cnts, key = cv.contourArea)
        return (thresholded, segmented)


In [8]:
def hist_skin(hist, hsv):
    
    dst = cv.calcBackProject([hsv], [0, 1], hist, [0, 200, 0, 256], 1)
    dst1 = dst.copy()
    disc = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
    cv.filter2D(dst, -1, disc, dst)
    blur = cv.GaussianBlur(dst, (11, 11), 0)
    blur = cv.medianBlur(blur, 15)
    #cv.imshow("Blur", blur)
    ret, thresh = cv.threshold(blur, 0, 255, cv.THRESH_BINARY)#+cv.THRESH_OTSU)
    #thresh = cv.merge((thresh, thresh, thresh))
    
    return thresh

In [9]:
def gest_avg(cap, gest, aWeight):
    if gest is None:
        gest = cap.astype('float')
        return gest
    return cv.accumulateWeighted(cap, gest, aWeight)
    #print(bg)

In [10]:

def display_output(c, num_chars, output, output_back):
    if c is None:
        return
    
    w = 20
    h = 20
    x = 100 + int((num_chars%20))*w
    y = 110 + int((num_chars/20))*h
    
    if output is None:
        output = str(c)
    else:
        output = output + str(c)
        
    output_back = cv.putText(output_back, str(c), (int(x), int(y)),  fontFace=cv.FONT_HERSHEY_DUPLEX,
                                                           fontScale=0.6,  thickness = 2, color=(255, 255, 255))
    return output_back, output



In [None]:
bg = None
if __name__ == '__main__':
    
    aWeight = 0.5
    
    flag = False
    
    capture = False
    
    camera = cv.VideoCapture(0)
    
    top, right, bottom, left = 10, 80, 266, 336
    
    in_top, in_right, in_bottom, in_left = top+50, right+100, bottom-50, left-100
    
    num_frames = 0
    
    gest_frames = 0
    
    gest = None
    
    dim = (50, 50)
    
    num_chars = 0
    
    output = None
    
    output_back = cv.imread('./output_back1.jpg')

    
    while True:
        (ret, frame) = camera.read()
        
        frame = cv.resize(frame, (640, 480))
        frame = cv.flip(frame, 1)
        
        clone = frame.copy()
        
        (height, width) = frame.shape[:2]
        roi = frame[top:bottom, right:left]
        hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV)
        
        in_roi = frame[in_top:in_bottom, in_right:in_left]
        
        gray = cv.cvtColor(roi, cv.COLOR_BGR2GRAY)
        gray = cv.GaussianBlur(gray, (3, 3), 0)
        gray = cv.medianBlur(gray, 3)
        
        keypress = cv.waitKey(1) & 0xFF
        
        if keypress == ord('c'):
            break
            
        elif keypress == ord('s'):
            flag = True
            print('skin histogram captured!')
            hsvCrop = cv.cvtColor(in_roi, cv.COLOR_BGR2HSV)
            cv.imshow("i", hsvCrop)
            hist = cv.calcHist([hsvCrop], [0, 1], None, [200, 256], [0, 200, 0, 256])
            cv.normalize(hist, hist, 0, 255, cv.NORM_MINMAX)
            
        elif keypress == ord('g'):
            capture = True
            gest_frames = 0
            #print('g')
            
        if keypress == 13:
            bg = None
            num_frames = 0
        
        if num_frames < 100:
            run_avg(gray, aWeight)
        else:
            if(num_frames==100):
                print('background captured!')
            hand = segment(gray)
            
            if hand is not None:
                (thresholded, segmented) = hand
                
                cv.drawContours(clone, [segmented + (right, top)], -1, (0, 165, 255))
                
                cv.imshow('Output', output_back)
                if flag:
                    skin = hist_skin(hist, hsv_roi)
                    #print(thresholded.shape, skin.shape)
                    thresholded = cv.bitwise_or(thresholded, skin)
                    (cnts, _) = cv.findContours(thresholded.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
                    segmented2 = max(cnts, key = cv.contourArea)
                    cv.drawContours(clone, [segmented2 + (right, top)], -1, (250, 0, 205), 2)
                if capture:
                    if gest_frames<20:
                        now = thresholded
                        gest = gest_avg(now, gest, aWeight)
                        gest_frames+=1
              
                    else:
                        #gest = cv.resize(gest, dim, interpolation = cv.INTER_AREA)
                        cv.imwrite('./mydata/gesty.jpg', gest)
                        #print('Gesture saved!')
                        gesty = cv.imread('./mydata/gesty.jpg')
                        recog = Image.fromarray(gesty)
                        digit = recognize_gest(recog)
                        output_back, output = display_output(digit, num_chars, output, output_back)
                        num_chars+=1
                        capture = False
                    
                cv.imshow('Thresholded', thresholded)
                
                
        cv.rectangle(clone, (left, top), (right, bottom), (0, 255, 0), 2)
        cv.rectangle(clone, (in_left, in_top), (in_right, in_bottom), (255, 0, 0), 1)
        
        num_frames += 1
        cv.imshow('Video Feed', clone)
            
cv.destroyAllWindows()
camera.release()

background captured!
skin histogram captured!
background captured!
background captured!
0background captured!
123456789background captured!
background captured!
background captured!
6background captured!
8

In [12]:
gesty = cv.imread('./mydata/gesty.jpg')
recog = Image.fromarray(gesty)
print(gesty.shape)
recognize_gest(recog)

(256, 256, 3)
0

'0'