In [84]:
#run on airwriting venv py 3.9.16 prooved
# All imports
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import pandas as pd
from transformers import DPTForDepthEstimation, DPTFeatureExtractor
import mediapipe as mp
from torchvision.transforms.functional import resize

In [85]:
def recordVideo(output) :
    cap = cv2.VideoCapture(0)

    # Define the codec and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use appropriate codec for your system
    out = cv2.VideoWriter(output, fourcc, 30.0, (640, 480))

    while True:
        ret, frame = cap.read()
        if ret:
            out.write(frame)
            
            # Write the frame to the output file
            frame = cv2.flip(frame, 1)

            # Display the resulting frame
            cv2.imshow('frame', frame)

            # Exit recording if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [90]:
def video_to_depthVideo(video_path, model, feature_extractor, batch_size=4):
    
    cap = cv2.VideoCapture(video_path)

    video_path = video_path.split('\\')[0] + "\depth_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(video_path, fourcc, 30, (640, 480))
    
    try:
        batch = []
        for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
            is_read, frame = cap.read()
            if not is_read:
                break
            image = Image.fromarray(frame)
            pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
            batch.append(pixel_values)

            if len(batch) == batch_size:
                with torch.no_grad():
                    outputs = model(torch.cat(batch, dim=0))
                    #outputs = model(batch[0])
                predicted_depths = outputs.predicted_depth

                predictions = torch.nn.functional.interpolate(
                    predicted_depths.unsqueeze(1),
                    size=image.size[::-1],
                    mode="bicubic",
                    align_corners=False,
                ) 
                               
                predictions = torch.split(predictions, split_size_or_sections=1, dim=0)

                for pred in predictions:
                    output = pred.squeeze().cpu().numpy()
                    formatted = (output * 255 / np.max(output)).astype("uint8")
                    img = Image.fromarray(formatted)
                    img.save('img.jpg')
                    image = cv2.imread('img.jpg')
                    video.write(image)                
                batch = []

        # Traiter le dernier batch
        if len(batch) > 0:
            with torch.no_grad():
                outputs = model(torch.cat(batch, dim=0))
                predicted_depths = outputs.predicted_depth

                predictions = torch.nn.functional.interpolate(
                        predicted_depths.unsqueeze(1),
                        size=image.size[::-1],
                        mode="bicubic",
                        align_corners=False,
                    ) 
                                
                predictions = torch.split(predictions, split_size_or_sections=1, dim=0)

                for pred in predictions:
                    output = pred.squeeze().cpu().numpy()
                    formatted = (output * 255 / np.max(output)).astype("uint8")
                    img = Image.fromarray(formatted)
                    img.save('img.jpg')
                    image = cv2.imread('img.jpg')
                    video.write(image)    
    finally:
        cap.release()
        video.release()
        print("La vidéo a été créée avec succès !")
    
    return video_path


In [89]:
def video_tracking(original_video_path, depth_video_path):
    # sourcery skip: low-code-quality
        #finger sur une vidéo

    # Set up video capture from default camera
    cap = cv2.VideoCapture(original_video_path)
    depth = cv2.VideoCapture(depth_video_path)

    # Set up MediaPipe hand detection
    mpHands = mp.solutions.hands
    hands = mpHands.Hands()
    mpDraw = mp.solutions.drawing_utils

    # Initialize list of finger positions
    finger_positions = []
    images = []
    # Main loop for video capture and hand detection
    imageCompteur = 0
    while cap.isOpened():
        # Capture a frame from the camera
        success, image = cap.read()

        # Check if the frame was successfully read
        if not success:
            break

        if depth.isOpened():
            # Capture a frame from the camera
            success_depth, image_depth = depth.read()

            # Check if the frame was successfully read
            if not success_depth:
                break

            image = cv2.flip(image, 1)
            image_depth = cv2.flip(image_depth, 1)

            mask = image_depth > 220
            sumMask = sum(sum(sum(mask))) 

            # Convert the color space of the image from BGR to RGB
            imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Use MediaPipe to detect hand landmarks in the image
            results = hands.process(imageRGB)

            # Check if any hands were detected in the image
            if results.multi_hand_landmarks:
                # Loop through all detected hands
                for handLms in results.multi_hand_landmarks:
                    # Loop through all the landmarks of the current hand
                    hasHeight = False
                    for id, lm in enumerate(handLms.landmark):
                        # Get the pixel coordinates of the landmark
                        h, w, c = image.shape
                        cx, cy = int(lm.x * w), int(lm.y * h)

                        # If the current landmark is the tip of the index finger, add its position to the list
                        if id == 8 :
                            hasHeight = True
                            if sumMask> 115000 : #image_depth[cy][cx][0] > 220 :
                                finger_positions.append((cx, cy))
                                cv2.circle(image, (cx, cy), 10, (255, 0, 255), cv2.FILLED)
                                imageCompteur = 0 
                            else : 
                                imageCompteur +=1

                    if not hasHeight :
                        imageCompteur +=1
                    if imageCompteur >= 5 :
                        finger_positions.append("stop")
                        imageCompteur = 0

                    # Draw the landmarks and connections on the image using MediaPipe
                    mpDraw.draw_landmarks(image, handLms, mpHands.HAND_CONNECTIONS)

            # If there are any finger positions in the list, draw a curve passing through all of them
            finger_positions.append("stop")
            if finger_positions:
                #split array
                curve = []
                for i in finger_positions :
                    if i != "stop" :
                        curve.append(i)
                    elif len(curve) > 0:
                        curve = np.array(curve)
                        cv2.polylines(image, [curve], False, (255, 0, 0), 3)
                        curve = []
            finger_positions.pop()

            # Display the image on the screen
            cv2.imshow("Output", image)
            images.append(image)

            # Check for the Esc key to stop the program
            if cv2.waitKey(5) & 0xFF == 27:
                break

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    savePath = depth_video_path.split('\\')[0] + "\\tracking.mp4"

    out = cv2.VideoWriter(savePath, fourcc, 30, (640, 480))
    print(f"video save at: {savePath}")

    # Main loop for writing video
    for img in images:
        out.write(img)

    out.release()
    # Release the video capture object and close all windows
    cap.release()
    depth.release()
    cv2.destroyAllWindows()

In [91]:
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas", cache_dir="models/")
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas", cache_dir="models/")

In [92]:
#main 

output = 'video\init.mp4'   
recordVideo(output)
video_path = video_to_depthVideo(output, model, feature_extractor, 32)
video_tracking(output, video_path)


La vidéo a été créée avec succès !
video save at: video\tracking.mp4
