In [1]:
import cv2
import numpy as np

# Webcam and assets
cap = cv2.VideoCapture(0)
imgTarget = cv2.imread('markerimg.jpg')
video = cv2.VideoCapture('displayvideo1.mp4')

if imgTarget is None:
    raise FileNotFoundError("Target image not found.")
if not video.isOpened():
    raise FileNotFoundError("Video file not found or can't be opened.")
if not cap.isOpened():
    raise RuntimeError("Webcam could not be accessed.")

# Get first frame
success, dispVideo = video.read()
if not success:
    raise RuntimeError("Couldn't read from the video.")

# Resize video frame to match target
hT, wT, cT = imgTarget.shape
dispVideoResized = cv2.resize(dispVideo, (wT, hT))

# ORB feature detector
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(imgTarget, None)

detection = False
frameCounter = 0

while True:
    success, WebcamFeed = cap.read()
    if not success:
        print("Failed to grab frame from webcam.")
        break

    AugmentedImage = WebcamFeed.copy()
    kp2, des2 = orb.detectAndCompute(WebcamFeed, None)

    # If descriptors are not found, skip this frame
    if des2 is None or des1 is None:
        cv2.imshow('AugmentedImage', AugmentedImage)
        cv2.waitKey(1)
        continue

    # Matcher and filtering
    bruteForce = cv2.BFMatcher()
    matches = bruteForce.knnMatch(des1, des2, k=2)

    good = []
    for m, n in matches:
        if m.distance < 0.75 * n.distance:
            good.append(m)

    print(f"Good matches: {len(good)}")
    imgFeatures = cv2.drawMatches(imgTarget, kp1, WebcamFeed, kp2, good, None, flags=2)

    if len(good) > 15:
        detection = True
        srcpt = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        despt = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

        matrix, mask = cv2.findHomography(srcpt, despt, cv2.RANSAC, 5)
        if matrix is not None:
            pts = np.float32([[0, 0], [0, hT], [wT, hT], [wT, 0]]).reshape(-1, 1, 2)
            dst = cv2.perspectiveTransform(pts, matrix)
            img2 = cv2.polylines(WebcamFeed, [np.int32(dst)], True, (255, 0, 255), 3)

            if frameCounter >= video.get(cv2.CAP_PROP_FRAME_COUNT):
                video.set(cv2.CAP_PROP_POS_FRAMES, 0)
                frameCounter = 0

            success, dispVideo = video.read()
            if success:
                dispVideoResized = cv2.resize(dispVideo, (wT, hT))
                imgWarp = cv2.warpPerspective(dispVideoResized, matrix, (WebcamFeed.shape[1], WebcamFeed.shape[0]))

                # Masking
                maskNew = np.zeros((WebcamFeed.shape[0], WebcamFeed.shape[1]), np.uint8)
                cv2.fillPoly(maskNew, [np.int32(dst)], (255, 255, 255))
                mskInv = cv2.bitwise_not(maskNew)

                AugmentedImage = cv2.bitwise_and(AugmentedImage, AugmentedImage, mask=mskInv)
                AugmentedImage = cv2.bitwise_or(imgWarp, AugmentedImage)

            frameCounter += 1
        else:
            print("Homography matrix could not be computed.")
            detection = False
    else:
        detection = False
        video.set(cv2.CAP_PROP_POS_FRAMES, 0)
        frameCounter = 0

    # Display
    # cv2.imshow('border',img2)
    # cv2.imshow('imgWarp',imgWarp)
    # cv2.imshow('maskNew',maskNew)
    cv2.imshow('imgFeatures',imgFeatures)
    cv2.imshow('AugmentedImage',AugmentedImage)  
    cv2.imshow('imgTarget',imgTarget)
    # cv2.imshow('imgTargetWithKeyPoints',imgTargetWithKeyPoints)
    cv2.imshow('dispVideo',dispVideoResized)
    # cv2.imshow ('WebcamFeed',WebcamFeed)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
video.release()
cv2.destroyAllWindows()


qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/rakib/miniconda3/envs/dev/lib/python3.12/site-packages/cv2/qt/plugins"
QFont::fromString: Invalid description 'Noto Sans,11,-1,5,400,0,0,0,0,0,0,0,0,0,0,1'
QFont::fromString: Invalid description 'Hack,11,-1,5,400,0,0,0,0,0,0,0,0,0,0,1'
QFont::fromString: Invalid description 'Noto Sans,11,-1,5,400,0,0,0,0,0,0,0,0,0,0,1'
QFont::fromString: Invalid description 'Noto Sans,11,-1,5,400,0,0,0,0,0,0,0,0,0,0,1'


Good matches: 0
Good matches: 3
Good matches: 0
Good matches: 0
Good matches: 0
Good matches: 0
Good matches: 0
Good matches: 1
Good matches: 2
Good matches: 3
Good matches: 3
Good matches: 4
Good matches: 2
Good matches: 4
Good matches: 3
Good matches: 1
Good matches: 4
Good matches: 3
Good matches: 2
Good matches: 4
Good matches: 3
Good matches: 4
Good matches: 6
Good matches: 3
Good matches: 4
Good matches: 4
Good matches: 6
Good matches: 1
Good matches: 5
Good matches: 2
Good matches: 3
Good matches: 3
Good matches: 4
Good matches: 2
Good matches: 3
Good matches: 6
Good matches: 3
Good matches: 1
Good matches: 2
Good matches: 1
Good matches: 4
Good matches: 2
Good matches: 3
Good matches: 5
Good matches: 6
Good matches: 2
Good matches: 1
Good matches: 3
Good matches: 3
Good matches: 2
Good matches: 6
Good matches: 6
Good matches: 2
Good matches: 2
Good matches: 1
Good matches: 2
Good matches: 0
Good matches: 4
Good matches: 4
Good matches: 2
Good matches: 3
Good matches: 0
Good mat

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
import cv2
import numpy as np

#getting the webcam feed and saving the image and video in variables
cap = cv2.VideoCapture(0)
imgTarget= cv2.imread('markerimg.jpg')
video = cv2.VideoCapture('displayvideo1.mp4')

detection =False
frameCounter =0

#gets the first frame of the video
success,dispVideo = video.read()

#resizing the video frame to match the image target
hT,wT,cT = imgTarget.shape
dispVideoResized= cv2.resize(dispVideo,(wT,hT))

#implementing oriented FAST algorithm
orb = cv2.ORB_create(nfeatures = 1000)
# creating keypoints and descriptors
kp1,des1 = orb.detectAndCompute(imgTarget,None)
imgTargetWithKeyPoints = cv2.drawKeypoints(imgTarget,kp1,None)

while(True):
    #reading the webcam feed for everyframe
    sucess,WebcamFeed = cap.read()
    AugmentedImage = WebcamFeed.copy()
    
    kp2,des2 = orb.detectAndCompute(WebcamFeed,None)
    WebcamFeed = cv2.drawKeypoints(WebcamFeed,kp2,None)
    
    # if there is no image target detected reset the video i.e start the video from the begining and set the frame counter to 0
    if detection == False:
        video.set(cv2.CAP_PROP_POS_FRAMES,0)
        frameCounter = 0
    else:
        # if the video has completed loop back again
        if frameCounter == video.get(cv2.CAP_PROP_FRAME_COUNT):
            video.set(cv2.CAP_PROP_POS_FRAMES,0)
            frameCounter = 0
        success,dispVideo = video.read()
        dispVideoResized= cv2.resize(dispVideo,(wT,hT))
    #matching the target image with the webcam feed by comparing the descriptor values using K nearest neighbours
    bruteForce = cv2.BFMatcher()
    # setting 2 output values  becaues two images are compared
    matches = bruteForce.knnMatch(des1, des2, k=2)
    good =[]
    # since K=2, the two vlaues in matches can be unpacked
    for m,n in matches:
        #comparing the distance between the descriptor values of the respective images and classifying them as good 
        if m.distance<0.75*n.distance:
            good.append(m)
    print(len(good))
    imgFeatures = cv2.drawMatches(imgTarget,kp1,WebcamFeed,kp2,good,None,flags= 2)
    
    if len(good)>15:
        detection = True
        #creating source points and destination points matrix to measure homography (to perform necessary transformations)
        srcpt= np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
        despt= np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
        
        #using ransac algorithm to eliminate any outliers
        matrix,mask = cv2.findHomography(srcpt,despt,cv2.RANSAC,5)
        print(matrix)
        
        #drawing the bounding box around the target image using perspective transform
        pts = np.float32([[0,0],[0,hT],[wT,hT],[wT,0]]).reshape(-1,1,2)
        dst = cv2.perspectiveTransform(pts,matrix)
        #configuring the shape and colour of the bounding box
        img2 = cv2.polylines(WebcamFeed,[np.int32(dst)],True,(255,0,255),3)
        #warping the webcam feed to the transformed imagetarget in the scene
        imgWarp = cv2.warpPerspective(dispVideoResized,matrix,(WebcamFeed.shape[1],WebcamFeed.shape[0]))
        
        #creating a mask to overlay the video on the target image        
        maskNew = np.zeros((WebcamFeed.shape[0],WebcamFeed.shape[1]),np.uint8)
        cv2.fillPoly(maskNew,[np.int32(dst)],(255,255,255))
        mskInv = cv2.bitwise_not(maskNew)
        AugmentedImage = cv2.bitwise_and(AugmentedImage,AugmentedImage,mask=mskInv)
        AugmentedImage = cv2.bitwise_or(imgWarp,AugmentedImage)
        

           
        
        
    #cv2.imshow('border',img2)
    #cv2.imshow('imgWarp',imgWarp)
    #cv2.imshow('maskNew',maskNew)
    cv2.imshow('imgFeatures',imgFeatures)
    cv2.imshow('AugmentedImage',AugmentedImage)  
    #cv2.imshow('imgTarget',imgTarget)
    #cv2.imshow('imgTargetWithKeyPoints',imgTargetWithKeyPoints)
    #cv2.imshow('dispVideo',dispVideoResized)
    #cv2.imshow ('WebcamFeed',WebcamFeed)
    cv2.waitKey(1)
    frameCounter +=1

