In [93]:
#run on airwriting venv py 3.9.16 prooved
# All imports
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import pandas as pd
from transformers import DPTForDepthEstimation, DPTFeatureExtractor
import mediapipe as mp

In [94]:
def recordVideo(output) :
    cap = cv2.VideoCapture(0)

    # Define the codec and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use appropriate codec for your system
    out = cv2.VideoWriter(output, fourcc, 30.0, (640, 480))

    while True:
        ret, frame = cap.read()
        if ret:
            out.write(frame)
            
            # Write the frame to the output file
            frame = cv2.flip(frame, 1)

            # Display the resulting frame
            cv2.imshow('frame', frame)

            # Exit recording if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [95]:
def video_to_frames(video_path):
    #video_path = 0 pour le flux caméra en direct
    if type(video_path)== str:
        dirname, _ = os.path.splitext(video_path)
        dirname += "-opencv"
        
    cap = cv2.VideoCapture(video_path)
    
    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    count = 0        
    while True :
        is_read, frame = cap.read()

        if not is_read:
            break
        im_name = dirname+'/frame_'+str(count)+'.jpg'
        cv2.imwrite(im_name, frame)
        count+=1

    return dirname


In [96]:
def frame_to_depth_frame (path):
    
    model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas", cache_dir = "models/")
    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas", cache_dir = "models/")

    dir_list = os.listdir(path)
    img_names = sorted(dir_list, key=lambda x: int(x.split(".")[0].split("_")[1]))

    batch_size = 15

    number_of_batchs = len(img_names) // batch_size
    output_path = path.split('\\')[0] + '\depth'
    
    if not os.path.isdir(output_path):
        os.mkdir(output_path)
    
    for batch in range(number_of_batchs):
        start = batch * batch_size
        end = start + 15 if (start + 15) < len(img_names) else len(img_names)

        images = [Image.open(path + "\\" + str(img_names[i])) for i in range(start, end)]

        pixel_values = [feature_extractor(images= image , return_tensors="pt").pixel_values for image in images]
        pixel_values = torch.cat(pixel_values, dim=0)

        with torch.no_grad():
            outputs = model(pixel_values)
            predicted_depth = outputs.predicted_depth

        # interpolate to original size
        prediction = torch.nn.functional.interpolate(
            predicted_depth.unsqueeze(1),
            size=images[0].size[::-1],
            mode="bicubic",
            align_corners=False,
        )

        # visualize the prediction
        predictions = torch.split(prediction, split_size_or_sections=1, dim=0)
        for i, pred in enumerate(predictions):
            output = pred.squeeze().cpu().numpy()
            formatted = (output * 255 / np.max(output)).astype("uint8")
            depth = Image.fromarray(formatted)

            depth.save(output_path + '\depth_'+ str(start + i) + ".jpg")

In [97]:
def frames_to_video(path):
        
    # Dossier contenant les images numérotées
    repo = path.split('\\')[0]
    img_folder = repo + "\depth\\"
    
    if not os.path.isdir(img_folder):
        os.mkdir(img_folder)

    # Obtenez la liste des noms de fichiers d'images dans le dossier
    img_names = os.listdir(img_folder)

    # Triez les noms de fichiers d'images par ordre numérique
    img_names = sorted(img_names, key=lambda x: int(x.split(".")[0].split("_")[1]))

    # Obtenez la largeur et la hauteur de la première image
    img_path = os.path.join(img_folder, img_names[0])
    img = cv2.imread(img_path)
    height, width, layers = img.shape

    # Créez un objet VideoWriter pour enregistrer la vidéo
    video_path = repo + "\depth_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(video_path, fourcc, 30, (width, height))

    # Parcourez chaque image triée et ajoutez-la à la vidéo
    for img_name in img_names:
        img_path = os.path.join(img_folder, img_name)
        img = cv2.imread(img_path)
        video.write(img)

    # Fermez l'objet VideoWriter et affichez un message de confirmation
    video.release()
    print("La vidéo a été créée avec succès !")
    return video_path

In [98]:
def video_tracking (original_video_path, depth_video_path) :
        #finger sur une vidéo

    # Set up video capture from default camera
    cap = cv2.VideoCapture(original_video_path)
    depth = cv2.VideoCapture(depth_video_path)

    # Set up MediaPipe hand detection
    mpHands = mp.solutions.hands
    hands = mpHands.Hands()
    mpDraw = mp.solutions.drawing_utils

    # Initialize list of finger positions
    finger_positions = []
    images = []
    # Main loop for video capture and hand detection
    imageCompteur = 0
    while cap.isOpened():
        # Capture a frame from the camera
        success, image = cap.read()

        # Check if the frame was successfully read
        if not success:
            break
        
        if depth.isOpened():
            # Capture a frame from the camera
            success_depth, image_depth = depth.read()

            # Check if the frame was successfully read
            if not success_depth:
                break

            image = cv2.flip(image, 1)
            image_depth = cv2.flip(image_depth, 1)

            mask = image_depth > 220
            sumMask = sum(sum(sum(mask))) 

            # Convert the color space of the image from BGR to RGB
            imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Use MediaPipe to detect hand landmarks in the image
            results = hands.process(imageRGB)

            # Check if any hands were detected in the image
            if results.multi_hand_landmarks:
                # Loop through all detected hands
                for handLms in results.multi_hand_landmarks:
                    # Loop through all the landmarks of the current hand
                    hasHeight = False
                    for id, lm in enumerate(handLms.landmark):
                        # Get the pixel coordinates of the landmark
                        h, w, c = image.shape
                        cx, cy = int(lm.x * w), int(lm.y * h)
                        
                        # If the current landmark is the tip of the index finger, add its position to the list
                        if id == 8 :
                            hasHeight = True
                            if sumMask> 115000 : #image_depth[cy][cx][0] > 220 :
                                finger_positions.append((cx, cy))
                                cv2.circle(image, (cx, cy), 10, (255, 0, 255), cv2.FILLED)
                                imageCompteur = 0 
                            else : 
                                imageCompteur +=1

                    if not hasHeight :
                        imageCompteur +=1
                    if imageCompteur >= 5 :
                        finger_positions.append("stop")
                        imageCompteur = 0
                        
                    # Draw the landmarks and connections on the image using MediaPipe
                    mpDraw.draw_landmarks(image, handLms, mpHands.HAND_CONNECTIONS)

            # If there are any finger positions in the list, draw a curve passing through all of them
            finger_positions.append("stop")
            if len(finger_positions) > 0:
                #split array
                curve = []
                for i in finger_positions :
                    if i != "stop" :
                        curve.append(i)
                    elif len(curve) > 0:
                        curve = np.array(curve)
                        cv2.polylines(image, [curve], False, (255, 0, 0), 3)
                        curve = []
            finger_positions.pop()
            
            # Display the image on the screen
            cv2.imshow("Output", image)
            images.append(image)

            # Check for the Esc key to stop the program
            if cv2.waitKey(5) & 0xFF == 27:
                break

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    savePath = depth_video_path.split('\\')[0] + "\\tracking.mp4"
    out = cv2.VideoWriter(savePath, fourcc, 30, (image.shape[1], image.shape[0]))
    print("video save at: " + savePath)

    # Main loop for writing video
    for img in images:
        out.write(img)

    out.release()
    # Release the video capture object and close all windows
    cap.release()
    depth.release()
    cv2.destroyAllWindows()

In [99]:
#main 
output = 'video\init.mp4'
recordVideo(output)
frame_dir = video_to_frames(output)

#frame_dir = "video\WIN_20230309_14_27_22_Pro-opencv"
#frame_dir = "video\source-opencv"
frame_to_depth_frame(frame_dir)
video_path = frames_to_video(output)
video_tracking(output, video_path) 

video\init-opencv
La vidéo a été créée avec succès !
video save at: video\tracking.mp4
