# OpenCV High-GUI

## User-assisted Segmentation

The user specifies a rectangle containing the object

Graph cut segmentation is used to find the object inside the rectangle

In [3]:
import cv2
import numpy as np

# global variables
tl = (0,0)
br = (0,0)
bDraw = False
done = False

# Mouse callback function
def mouseCallBackFunc(event,x,y,flags,userdata):
    # grab references to the global variables
    global tl,br,bDraw,done
    
    # if left button is pressed
    if ( event == cv2.EVENT_LBUTTONDOWN ):
        # top-left corner
        tl = (x,y)    
        # set the draw flag
        bDraw = True
        
    # mouse movement on the window
    elif ( event == cv2.EVENT_MOUSEMOVE ):
        if(bDraw):
            # current bottom-right corner
            br = (x,y)
            # Display current rect
            img_disp = img.copy()
            cv2.rectangle(img_disp,tl,br,(0,255,0),2)
            cv2.imshow(windowName, img_disp);
            cv2.waitKey(10)
            
    # if left button is released
    elif( event == cv2.EVENT_LBUTTONUP ):
        # final bottom-right corner
        br = (x,y)        
        # Display the rect
        img_disp = img.copy()
        cv2.rectangle(img_disp,tl,br,(255,0,0),2)
        cv2.imshow(windowName, img_disp);
        cv2.waitKey(1000)        
        # Set the flag
        done = True


# Load the image
SAMPLES_DATA_DIR = 'C:/opencv/sources/samples/data/'
img = cv2.imread(SAMPLES_DATA_DIR+'left.jpg')
#img = cv2.imread('cheeky_penguin.png')
img_disp = img.copy()

# Create a window
windowName = "Draw a rectange over the object"
cv2.namedWindow(windowName);

# Display the image
cv2.imshow(windowName, img);

# Set the callback function for any mouse event
cv2.setMouseCallback(windowName,mouseCallBackFunc,None);

# Prompt the user to draw a rectange over the object
while(1):
    cv2.waitKey(1)
    if (done):
        break
        
# Close all windows
cv2.destroyAllWindows()

# Segmentation using graph cuts
# cv2.grabCut(img,mask,rect,bgdModel,fgdModel,iterCount,mode)
# img - Input image
# mask - It is a mask image where we specify which areas are background, foreground or probable background/foreground
#      - It is done by the following flags, cv2.GC_BGD, cv2.GC_FGD, cv2.GC_PR_BGD, cv2.GC_PR_FGD
# rect - It is the coordinates of a rectangle which includes the foreground object in the format (x,y,w,h)
# bdgModel,fgdModel - These are arrays used by the algorithm internally. Use two np.float64 type zero arrays of size (1,65)
# iterCount - Number of iterations the algorithm should run
# mode - It should be cv2.GC_INIT_WITH_RECT for initializing with a rect, or cv2.GC_INIT_WITH_MASK for initializing with a mask
rect = (tl[0],tl[1],br[0]-tl[0],br[1]-tl[1])
bgdModel = np.zeros((1,65),np.float64)
fgdModel = np.zeros((1,65),np.float64)
mask, bgdModel, fgdModel = cv2.grabCut(img,None,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)

# If a pixel is BGD or probably BGD, set it to 0, otherwise, set it to 1
mask = np.where((mask==cv2.GC_BGD)|(mask==cv2.GC_PR_BGD),0,1).astype('uint8')
# newaxis is used to increase the dimension of the existing array by one more dimension
img_disp = img * mask[:,:,np.newaxis]

# Display result
cv2.imshow("Result", img_disp);
cv2.waitKey(0)
cv2.destroyAllWindows()

# Optical Flow

Drawing a flow arrow on points with a magnitude flow > threshold

In [7]:
import cv2
import numpy as np

# Video reader
SAMPLES_DATA_DIR = 'C:/opencv/sources/samples/data/'
videoreader = cv2.VideoCapture(SAMPLES_DATA_DIR+"vtest.avi")

# Flow magnitude threshold 
MAG_THRESH = 0.99

# trackbar callback function
def trackbarCallback(x):
    # grab references to the global variables
    global MAG_THRESH
    # read the current positions of the trackbars
    MAG_THRESH = cv2.getTrackbarPos('MagThresh(%)',windowName) / 100
    
# Create a window
windowName = "Optical Flow"
cv2.namedWindow(windowName);

# Add a trackbar to the window
# The callback function is called when the trackbar changes
cv2.createTrackbar('MagThresh(%)',windowName,1,100,trackbarCallback) 

# Set trackbar initial position
cv2.setTrackbarPos('MagThresh(%)',windowName,int(MAG_THRESH*100))


# Read a single frame (the first frame)
# read() also returns a bool (True/False). If frame is read correctly, it will be True
ret, currentframeRGB = videoreader.read()
# convert to grayscale
currentframe = cv2.cvtColor(currentframeRGB,cv2.COLOR_BGR2GRAY)
    
while(1):
    # Update the previous frame to the current frame
    previousFrameRGB = currentframeRGB.copy()
    previousFrame = currentframe.copy()
   
    # Read the next frame
    ret, currentframeRGB = videoreader.read()
    # terminates the loop if the last frame is reached
    if ret==0:
        break
    # convert to grayscale
    currentframe = cv2.cvtColor(currentframeRGB,cv2.COLOR_BGR2GRAY)

    # Compute optical flow using the Gunner Farneback’s algorithm
    # calcOpticalFlowFarneback(prev,next,pyr_scale,levels,winsize,iterations,poly_n,poly_sigma,flags)
    # pyr_scale: parameter specifying the image scale (<1) to build pyramids
    # levels: number of pyramid layers including the initial image
    # winsize: averaging window size
    # iterations: number of iterations the algorithm does at each pyramid level
    # poly_n: size of the pixel neighborhood used to find polynomial expansion in each pixel
    # poly_sigma: standard deviation of the Gaussian that is used to smooth derivatives
    # flags: OPTFLOW_USE_INITIAL_FLOW uses the input flow as an initial flow approximation
    # flow has the same size as prev with two channels: horizontal (0) and vertical (1) flow components
    flow = cv2.calcOpticalFlowFarneback(previousFrame,currentframe, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    
    # Compute the mag/ang flow from the horizontal/vertical flow
    mag = np.sqrt( flow[:,:,0]*flow[:,:,0] + flow[:,:,1]*flow[:,:,1] )
    
    # Normalize the mag flow
    magMax = np.amax(mag)
    # avoid dividing by 0
    mag = mag / (magMax+np.finfo(float).eps)        
    
    # list of all points, higher than the threshold
    # loc[0] is an numpy array, containing the x coordinates
    # loc[1] is an numpy array, containing the y coordinates
    loc = np.where( mag >= MAG_THRESH)
    
    # loop through the points
    # zip() aggregates elements from loc[0] and loc[1]
    for pt in zip(*loc[::-1]):  
        # draw arrows
        flowX = flow[pt[1],pt[0],0] 
        flowY = flow[pt[1],pt[0],1] 
        pt2 = ( int(pt[0]+flowX) , int(pt[1]+flowY) )
        cv2.arrowedLine(previousFrameRGB,pt,pt2,(255,0,0),1,cv2.LINE_AA,0,0.7)
    
    
    # Display    
    cv2.imshow(windowName,previousFrameRGB)
    # Wait
    k = cv2.waitKey(500) & 0xff
    # Terminate on pressing the 'escape' key 
    if k == 27:
        break

# Destroy the video reader    
videoreader.release()
# Destroy all windows
cv2.destroyAllWindows()
