In [1]:
import numpy as np
import cv2 
from matplotlib import pyplot as plt
from IPython.display import clear_output

# Put the code in try-except statements catch the keyboard exception and release the camera device and 
# continue with the rest of code.
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    # Try-except statement to manage exceptions
    try:
        while(True):
            # Capture frame
            ret, frame = cap.read()
            if not ret or frame is None:
                # Release the Video if ret is false
                cap.release()
                print("Released Video Resource")
                # Break exit the for loops
                break
            
            # Display the frame
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            plt.axis('off')
            plt.title("Input Stream")
            plt.imshow(frame)
            plt.show()
            
            # Clear cell output when new frame is available
            clear_output(wait=True)
    except KeyboardInterrupt:
        # If we press stop (jupyter GUI) release the video
        cap.release()
        print("Released Video Resource")


video = cv2.VideoCapture('res/Multiple View.avi')
w = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('output.avi', fourcc, fps, (w,  h))

img_test = cv2.imread('res/ReferenceFrame.png')
img_over = cv2.imread('res/AugmentedLayer.PNG')
over_mask = cv2.imread('res/AugmentedLayerMask.PNG',0)
reference_mask = cv2.imread('res/ObjectMask.PNG',0)

img_over = img_over[:,:640]
over_mask = over_mask[:,:640]

o_height, o_width = img_over.shape[:2]
t_height, t_width = img_test.shape[:2]

print(img_test.shape)
print(img_over.shape) 

# apply the augmented image over the original frame
inv_over_mask = cv2.bitwise_not(over_mask)
img_test = cv2.bitwise_and(img_test,img_test,mask=inv_over_mask)
img_over = cv2.bitwise_and(img_over,img_over,mask=over_mask)
img_test = cv2.add(img_test,img_over)

plt.imshow(cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB))
plt.show()


# now work on every frame of the video
sift = cv2.xfeatures2d.SIFT_create()

###############
# Defining index for approximate kdtree algorithm
FLANN_INDEX_KDTREE = 1

# Defining parameters for algorithm 
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)

# Defining search params.
# checks=50 specifies the number of times the trees in the index should be recursively traversed.
# Higher values gives better precision, but also takes more time
search_params = dict(checks = 50)

# Initializing matcher
flann = cv2.FlannBasedMatcher(index_params, search_params)
###############

# find keypoints in the reference frame
kp_reference = sift.detect(img_test,reference_mask)

# compute the descriptors of the reference frame
kp_reference, des_reference = sift.compute(img_test, kp_reference)

try:
    while(True):
        # Capture frame
        ret, frame = video.read()
        if not ret or frame is None:
            # Release the Video if ret is false
            video.release()
            print("Released Video Resource")
            # Break exit the for loops
            break
        
        # find keypoints
        kp_new = sift.detect(frame)
        # img_visualization = cv2.drawKeypoints(frame,kp_new,None,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        # img_visualization = cv2.cvtColor(img_visualization,cv2.COLOR_BGR2RGB)
        # plt.imshow(img_visualization)
        # plt.show()
        
        # compute the descriptors
        kp_new, des_new = sift.compute(frame, kp_new)
            
        # feature matching with the reference frame
        matches = flann.knnMatch(des_reference,des_new,k=2)
        
        # filter bad matches
        good = []
        for m,n in matches:
            if m.distance < 0.7*n.distance:
                good.append(m)
        
        ### calculate the homography
        MIN_MATCH_COUNT = 10
        if len(good)>MIN_MATCH_COUNT:
            # building the corrspondences arrays of good matches
            src_pts = np.float32([ kp_reference[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
            dst_pts = np.float32([ kp_new[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
            # Using RANSAC to estimate a robust homography. 
            # It returns the homography M and a mask for the discarded points
            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        else:
            print( "Not enough matches are found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
        
        ### make the transformation of the augmented image
        augmented_new = cv2.warpPerspective(img_over, M, (t_width,t_height))
        mask_new = cv2.warpPerspective(over_mask, M, (o_width,o_height))
       
    
        # apply the augmented image over the original frame
        inv_over_mask = cv2.bitwise_not(mask_new)
        frame = cv2.bitwise_and(frame,frame,mask=inv_over_mask)
        augmented_new = cv2.bitwise_and(augmented_new,augmented_new,mask=mask_new)
        frame = cv2.add(frame,augmented_new)
        
        # write the video
        #frame = np.flip(frame, axis=0)
        out.write(frame)
        
        # Display the frame
        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #plt.axis('off')
        #plt.title("Input Stream")
        #plt.imshow(frame)
        #plt.show()
            
        # Clear cell output when new frame is available
        clear_output(wait=True)
except KeyboardInterrupt:
    # If we press stop (jupyter GUI) release the video
    video.release()
    print("Released Video Resource")

video.release()
out.release()

Released Video Resource
