In [1]:
import cv2
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms


In [2]:
#object used is circular in shape with one side coloured blue(to be detected)
#while other is black(not to be detected)

In [3]:
#HSV Color thresholds to identify the object color to be detected, here blue faced circular object
#NOTE- opencv uses hsv in range 0-180 for H, 0-255 for S,V
color_min = np.array([100, 150, 80])
color_max = np.array([180, 255, 200])

#draw_area contains the detected object which is used to draw numbers
draw_area = np.zeros((480,640,3), dtype = np.uint8)

#draw_pad contains the drawn number which is merged with the webcam feed to show the number on screen
draw_pad = np.zeros((480,640,3), dtype = np.uint8)

#filter for dilate/erode
filter = np.ones((5,5),np.uint8)

#stores the coordinates of prev_center
prev_center = None

#stores the last predicted value
predict = None

In [4]:
#Defining the CNN model structure
class Network(nn.Module):
    
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding = 0)
        self.conv2 = nn.Conv2d(32, 64, 5)
        
        self.fc1 = nn.Linear(64 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 60)
        self.fc3 = nn.Linear(60, 10)
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,kernel_size=2, stride=2)
        
        
        x = x.view(-1, 64 * 4 * 4)
        
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        
        x = self.fc3(x)
        #x = F.softmax(x, dim=1)
        
        return x
    

In [5]:
#Loading the trained model
net = torch.load('CNN_MNIST.pth')
net.eval()


Network(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=1024, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)

In [6]:
#crops the image ,removing the excess the drawpad area at the edges
def crop_image(img):
    for i in range(0,img.shape[0]):
        flag_up = np.any(img[i,:])
    
        if flag_up == True:
            up = i
            break

    for i in reversed(range(0,img.shape[0])):
        flag_down = np.any(img[i,:])
    
        if flag_down == True:
            down = i
            break
        
    for i in range(0,img.shape[1]):
        flag_left = np.any(img[:,i])
    
        if flag_left == True:
            left = i
            break

    for i in reversed(range(0,img.shape[1])):
        flag_right = np.any(img[:,i])
    
        if flag_right == True:
            right = i
            break
        
    crop = img[up:down,left:right]

    height = down - up
    width = right - left
    #cv2.imwrite('2crop.png',crop)
    return crop,height,width

#resizes image like a MNIST image
def resize_image(img,height,width):

    aspect_ratio = float(height/width)
    #type(img)
    
    if height > width:
        new_width = int(round(20 / aspect_ratio, 0))  #rounding off width with 0 decimal places,ie nearest int
        if (new_width == 0):  # rare case but minimum should be 1 pixel
            new_width = 1

        dim = (new_width, 20)   #image resized to fit in 20*20 box maintaining the aspect ratio
        im = cv2.resize(img, dim)
        #cv2.imwrite('3mini.png',im)
        wleft = 28 - new_width  # calculate vertical pozition
        #resizing to 28*28
        
        if wleft % 2 == 0:
            #adding extra pixels line after the edges to convert image to 28*28
            #equal lines added to width from top and bottom
            newImage = cv2.copyMakeBorder(im,4,4,int(wleft/2),int(wleft/2), borderType= cv2.BORDER_CONSTANT, value=0)
            
        else:
            #adding extra pixels line after the edges to convert image to 28*28
            # UN-equal lines added to width from top and bottom
            newImage = cv2.copyMakeBorder(im, 4, 4, int(round((wleft-1)/2, 0)), int(round((wleft-1)/2, 0)) +1, borderType= cv2.BORDER_CONSTANT, value=0)
            
    elif width > height:
        
        new_height = int(round(20 * aspect_ratio, 0))  # rounding off width with 0 decimal places,ie nearest int
        if (new_height == 0):  # rare case but minimum is 1 pixel
            new_height = 1
               
        dim = (20, new_height)   #image resized to fit in 20*20 box maintaining the aspect ratio
        im = cv2.resize(img, dim)
        #cv2.imwrite('3mini.png',im)
        hleft = 28 - new_height  # calculate vertical pozition
        
        if hleft % 2 == 0:
            newImage = cv2.copyMakeBorder(im, int(hleft/2), int(hleft/2), 4, 4, borderType= cv2.BORDER_CONSTANT, value=0)
        else:
            newImage = cv2.copyMakeBorder(im, int(round((hleft-1)/2, 0)), int(round((hleft-1)/2, 0)) +1, 4, 4, borderType= cv2.BORDER_CONSTANT, value=0)
            
    else: #the cases where cropped image is a square
        dim = (20,20)
        im=cv2.resize(img,dim)
        #cv2.imwrite('3mini.png',im)
        newImage = cv2.copyMakeBorder(im, 4, 4, 4, 4, borderType= cv2.BORDER_CONSTANT, value=0)
        
    #cv2.imwrite('4Preprocessed_image.png',newImage)
    return newImage


#function to combine cropping and resizing
#Complete MNIST Image Preprocessing
def preprocess(img):
    cropped_image, h, w = crop_image(img)
    #print(cropped_image.shape,type(cropped_image))
    Final_image =resize_image(cropped_image, h, w)
    
    return Final_image

In [7]:
#Predicts the number using the CNN model
def prediction(img):
    
    #converting the drawn image to tensor
    img_data = torch.from_numpy(img).view(1,1,28,28)
    
    pred = net(img_data.float())

    return pred.argmax(dim=1)

In [8]:
#Declaring the webcam input
camera = cv2.VideoCapture(0)

while(True):
    
    (grabbed, frame) = camera.read()               #frame contains the captured webcam frame
    frame = cv2.flip(frame, 1)                     #flips webcam frame as they are captured as mirror-image,
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)   #converts bgr frame to hsv frame
    
    #Finds the pixels having color in the specified range
    draw_area = cv2.inRange(hsv,color_min,color_max)
    
    #getting rid of small unnecessary pixel regions satisfying the color
    draw_area = cv2.erode(draw_area, filter, iterations =5)
    draw_area = cv2.dilate(draw_area, filter, iterations =5)
    
    #finds contour of the remaining regions satisfying the color 
    (contour, hierarchy) = cv2.findContours(draw_area.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    #Merges the drawn number with the webcam feed
    frame_drawing = cv2.bitwise_or(frame, draw_pad)
    
    if len(contour) > 0:
        
        #ASSUMING the max contour is of the object to be detected
        max_contour = max(contour, key = cv2.contourArea)
        
        # finds the center of the object to be detected and draws its approx contour 
        ((x, y), radius) = cv2.minEnclosingCircle(max_contour)
        center = int(x), int(y)
        cv2.circle(frame_drawing, center, int(radius), (0,0,0), 10)          
        
        #for irregular shaped object to be detected , use this code
        #cv2.drawContours(frame_drawing, contour, -1, (0, 0, 0), 10)
        #M=cv2.moments(boundary)
        #center = ( int(M['m10'] / M['m00']), int(M['m01'] / M['m00']))

        if prev_center != None:
            #draws line joining prev_center to the present center
            cv2.line(draw_pad,prev_center, center,(255, 255, 255),15)  
            
        prev_center = center
        
    if cv2.waitKey(1) & 0xff == 13: # assess your written digit,
        #ascii(enter key) == 13
        
        #converting 3-channel(rgb) drawn number/image to 0s and 1s ONLY, black or white
        draw_pad_gray = cv2.cvtColor(draw_pad, cv2.COLOR_BGR2GRAY)
        (th, draw_pad) = cv2.threshold(draw_pad_gray, 127, 255, cv2.THRESH_BINARY)
        
        #converting the drawn image to MNIST image
        image = preprocess(draw_pad)
        
        #Predicting the number drawn using the loaded CNN model
        predict = int(prediction(image))
        
        #Clearing draw_pad to get another drawn image
        draw_pad = np.zeros((480,640,3), dtype = np.uint8)
        prev_center = None
     
    display = "Prediction : " + str(predict)
    
    cv2.putText(frame_drawing, display, (5, 420), cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    cv2.imshow('Real Time Digit Recognition',frame_drawing)
    
    if cv2.waitKey(1) & 0xff == ord('q'): # quits/stops the webcam feed, Press 'q' key
        break
        
#Releases the webcam occupied memory
camera.release()

#Closes all windows
cv2.destroyAllWindows()


                
            
            
                                                                                      