In [2]:
from l2cs import Pipeline, render
import cv2
import os
import torch
import numpy as np
from screeninfo import get_monitors

print("Torch version:",torch.__version__)

if torch.cuda.is_available():
    print("Using CUDA")
else:
    print("Not using CUDA")
    
filename1 = 'foreground.mp4'
filename2 = 'background2.mp4'
folder_path = filename1 + '_' + filename2

if not os.path.exists(folder_path):
    os.makedirs(folder_path)
else:
    if not os.path.exists(f'{folder_path}/result'):
        os.makedirs(f'{folder_path}/result')
    if not os.path.exists(f'{folder_path}/mask'):
        os.makedirs(f'{folder_path}/mask')
    if not os.path.exists(f'{folder_path}/camera'):
        os.makedirs(f'{folder_path}/camera')

DISTANCE_TO_OBJECT = 500  # mm
HEIGHT_OF_HUMAN_FACE = 250  # mm

gaze_pipeline = Pipeline( weights= 'models/L2CSNet_gaze360.pkl', arch='ResNet50', device=torch.device('cuda')) # or 'cuda'

cap = cv2.VideoCapture(0)

image_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
image_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

image_min = min(image_height, image_width)
   
screen_width = int(get_monitors()[0].height*image_width/image_height)

# Create a window with an initial size
cv2.namedWindow('Window', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Window', screen_width, get_monitors()[0].height)

# Open the first video capture object 
cap1 = cv2.VideoCapture(filename1)

fps = 10

# Open the first video capture object
cap2 = cv2.VideoCapture(filename2)

# fourcc = cv2.VideoWriter_fourcc(*'H264')
# out_result = cv2.VideoWriter(f'{filename1}_result.mp4', fourcc, fps, (screen_width, get_monitors()[0].height))
# out_mask = cv2.VideoWriter(f'{filename1}_mask.mp4', fourcc, fps, (screen_width, get_monitors()[0].height))
# out_camera = cv2.VideoWriter(f'{filename1}_camera.mp4', fourcc, fps, (screen_width, get_monitors()[0].height))

x, y, width, height = 0, 0, screen_width, get_monitors()[0].height  # Adjust these values according to your requirements

bg_subtractor = cv2.createBackgroundSubtractorMOG2()

count = 0

gaze_points = []

avg_gaze_points = []

displacement = 0

avg_displacement = 0

displacements = []

displacement_max = np.sqrt(screen_width**2 + get_monitors()[0].height**2)

while(True): 

    _, frame = cap.read()    
    
    frame = cv2.flip(frame, 1)
    
    # Process frame and visualize
    results = gaze_pipeline.step(frame)
    frame = render(frame, results)
    
    face_height = int(results.bboxes[0][3]-results.bboxes[0][1])
    
    length_per_pixel = HEIGHT_OF_HUMAN_FACE / (1.5*face_height)

    dx = -2*DISTANCE_TO_OBJECT * np.tan(results.pitch[0])*image_width/((length_per_pixel)*image_height)
    
    #dx = -DISTANCE_TO_OBJECT * np.tan(results.pitch[0])/length_per_pixel
    
    # 100000000 is used to denote out of bounds
    dx = dx if not np.isnan(dx) else 100000000
    dy = -2*DISTANCE_TO_OBJECT * np.arccos(results.pitch[0])* np.tan(results.yaw[0]) / length_per_pixel
    dy = dy if not np.isnan(dy) else 100000000
    
    x_gaze = int((results.bboxes[0][0]+results.bboxes[0][2])//2)
    y_gaze = int((results.bboxes[0][1]+results.bboxes[0][3])//2)

    gaze_point = int((image_width / 2 + dx)*screen_width/image_width), int((image_height / 2 + dy + 200)*get_monitors()[0].height/image_height)
    
    gaze_points.append(gaze_point)
    
    gaze_points = gaze_points[-10:]
    
    
    
    avg_gaze_point = np.mean(gaze_points, axis=0).astype(int)
    
    avg_gaze_points.append(avg_gaze_point)
    
    avg_gaze_points = avg_gaze_points[-2:] #<----------------tail
    


    displacement = np.sqrt((avg_gaze_points[0][0] - avg_gaze_points[-1][0])**2 + (avg_gaze_points[0][1] - avg_gaze_points[-1][1])**2)

    displacements.append(displacement)

    displacements = displacements[-10:]

    avg_displacement = np.mean(displacements, axis=0).astype(int)
    
    
#----------------------------------------------------------------------



    # Read a frame from the foreground video
    
    ret1, frame1 = cap1.read()
    
    frame1 = frame1[y:y+height, x:x+width]
    
    if not ret1:
        break

    # Create a black mask with the same size as the frame
    circle_mask = np.zeros_like(frame1)

    opacity_incr = 0

    for i in range(len(avg_gaze_points)):
        
        radius = 0
        
        opacity = 0 + 15*int((displacement_max-4*avg_displacement)/(3*displacements[i]+1))
        
        avg_gaze_point = avg_gaze_points[i]
        
        r= image_height - 3*int(avg_displacement)
        
        if r> 0:
            
            radius = r

        cv2.circle(circle_mask, tuple(avg_gaze_point), radius, (opacity_incr, opacity_incr, opacity_incr), -1)
       
        opacity_incr += opacity/len(avg_gaze_points)
    
    fg_mask = bg_subtractor.apply(frame1)
    
    _, binary_mask = cv2.threshold(fg_mask, 100, 255, cv2.THRESH_BINARY)
    
    binary_mask = cv2.cvtColor(binary_mask, cv2.COLOR_GRAY2BGR)

    r_mask = cv2.bitwise_and(binary_mask,circle_mask)

    # Define the parameters for brightness and contrast adjustment
    alpha = 15  # Contrast control (1.0 means no change)

    
    d = displacement_max - 20*avg_displacement
     
    if d > 0:
        beta = 20 + (d/250)**3 # Brightness control (0 means no change)
    else:
        beta = 20

    r_mask = cv2.GaussianBlur(r_mask, (25, 25), 200)
    
    # Apply the brightness and contrast adjustment
    r_mask = cv2.convertScaleAbs(r_mask, alpha=alpha, beta=beta)
    r_mask = cv2.GaussianBlur(r_mask, (25, 25), 200)
    r_mask = cv2.GaussianBlur(r_mask, (25, 25), 200)
#-----------------------------------  
    # Read a frame from the background video
    ret2, frame2 = cap2.read()
    
    frame2 = frame2[y:y+height, x:x+width]

    if not ret2:
        break

    r_mask = (r_mask/255).astype(float) 
    frame2_n = (frame2/255).astype(float)
    frame1_n = (frame1/255).astype(float)

    result_frame = cv2.multiply(r_mask, frame2_n) + cv2.multiply(1- r_mask, frame1_n)

    cv2.circle(r_mask, gaze_point, 25, (0, 0, 255), -1)
    cv2.putText(r_mask, f"Displacement {int(displacement)}", (500, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, face_height, 0), 3)
    cv2.putText(r_mask, f"Radius {radius}", (1000, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, face_height, 0), 3)
    cv2.putText(r_mask, f"Face height {face_height}", (50, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, face_height, 0), 3)
    if 200<face_height<220: #190-205
        cv2.putText(r_mask, "Optimal distance", (50, 120), cv2.FONT_HERSHEY_PLAIN, 3, (255, face_height, 0), 3)
    
    cv2.circle(r_mask, (image_width//2 - 225, image_height//2-50), 25, (0, 65, 255), -1)
    cv2.circle(r_mask, (x_gaze - 225 , y_gaze-50), 25, (0, 255, 0), -1)


    # Display the resulting frame 
    cv2.imshow('Window', result_frame)
    cv2.imshow('Mask', r_mask)
    cv2.imshow('Camera', frame)
    
    
    result_frame_8bit = cv2.normalize(result_frame, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
    r_mask_8bit = (r_mask*255).astype(int)
    
    cv2.imwrite(f'{folder_path}/result/result_frame{count:05d}.jpg', result_frame_8bit)
    cv2.imwrite(f'{folder_path}/mask/mask_frame{count:05d}.jpg', r_mask_8bit)
    cv2.imwrite(f'{folder_path}/camera/camera_frame{count:05d}.jpg', frame)
    
    count += 1


    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break

# After the loop release the cap object 
cap.release() 
# Destroy all the windows 
cv2.destroyAllWindows() 

Torch version: 2.1.1+cu121
Using CUDA
