# Human pose and scoring

## Enregistrement vidéo de référence

In [1]:
import cv2
import mediapipe as mp
import csv
import time
import pandas as pd




In [3]:
video_path = './one_more_time.mp4'

In [17]:
# Initialize MediaPipe Pose and Drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
pose = mp_pose.Pose()

# Open the video file
cap = cv2.VideoCapture(video_path)

In [22]:
def write_landmarks_to_csv(landmarks, frame_number, csv_data, temps):
    #print(f"Landmark coordinates for frame {frame_number}:")
    for idx, landmark in enumerate(landmarks):
        #print(f"{mp_pose.PoseLandmark(idx).name}: (x: {landmark.x}, y: {landmark.y}, z: {landmark.z})")
        csv_data.append([frame_number,temps, mp_pose.PoseLandmark(idx).name, landmark.x, landmark.y, landmark.z])
    print("\n")

In [None]:
frame_number = 0
csv_data = []
temps_zero = round(time.time(),1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Pose
    result = pose.process(frame_rgb)

    # Draw the pose landmarks on the frame
    if result.pose_landmarks:
        mp_drawing.draw_landmarks(frame, result.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # Add the landmark coordinates to the list and print them
        write_landmarks_to_csv(result.pose_landmarks.landmark, frame_number, csv_data, round(time.time(),1)-temps_zero)
        frame_number+=1

    # Display the frame
    cv2.imshow('MediaPipe Pose', frame)

    # Exit if 'q' keypyt
cap.release()
cv2.destroyAllWindows()

In [24]:
df = pd.DataFrame(csv_data, columns=['Frame number','time', 'landmarks', 'x', 'y','z'])

In [26]:
df["x"]=((df["x"]-df["x"].min())/(df["x"].max()-df["x"].min()))
df["y"]=((df["y"]-df["y"].min())/(df["y"].max()-df["y"].min()))
df["z"]=((df["z"]-df["z"].min())/(df["z"].max()-df["z"].min()))

In [27]:
df.to_pickle("./omt_df.pkl")

## Comparaison et score

In [8]:
import numpy as np
from sklearn import preprocessing
from numpy.linalg import norm
import pandas as pd
import time
import cv2
import mediapipe as mp
import csv
from dtaidistance import dtw 
from fastdtw import fastdtw
import keyboard

A voir : https://www.theaidream.com/post/dynamic-time-warping-dtw-algorithm-in-time-series

In [2]:
dfRef = pd.read_pickle("./omt_df.pkl")

In [4]:
#Test
dfRef[(dfRef['time'] >= 0) & (dfRef['time'] <= 2)]['x']

0       0.536434
1       0.549232
2       0.556337
3       0.563253
4       0.524938
          ...   
1546    0.482015
1547    0.534423
1548    0.486860
1549    0.527111
1550    0.466769
Name: x, Length: 1551, dtype: float64

In [12]:
#Test
dfTest = pd.read_pickle("./image.pkl")

In [13]:
#Test
dfRef['x'].corr(dfTest['x'])

0.95276901244535

In [23]:
#Test
s1 = np.array([0.0, 0, 1, 2, 1, 0, 1, 0, 0])
s2 = np.array([0.0, 1, 2, 0, 0, 0, 0, 0, 0])
d1 = dtw.distance_fast(np.array(dfRef['x']), np.array(dfTest['x']))
d2 = dtw.distance_fast(np.array(dfRef['y']), np.array(dfTest['y']))
d1, d2 # retorune 0 si les deux séries sont égales

(13.27662788922727, 24.095914384206562)

In [3]:
def write_landmarks(landmarks, csv_data):
    #print(f"Landmark coordinates for frame {frame_number}:")
    for idx, landmark in enumerate(landmarks):
        #print(f"{mp_pose.PoseLandmark(idx).name}: (x: {landmark.x}, y: {landmark.y}, z: {landmark.z})")
        csv_data.append([mp_pose.PoseLandmark(idx).name, landmark.x, landmark.y, landmark.z])
    #print("\n")

In [4]:
def get_video_duration(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Erreur: Impossible d'ouvrir la vidéo.")
        return
    duration = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS))
    cap.release()

    return duration

In [5]:
video_path = './one_more_time.mp4'
duration_vid = int(get_video_duration(video_path))
print(f"Durée de la vidéo: {duration_vid} secondes")

Durée de la vidéo: 40 secondes


In [6]:
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose #on prend ici le pose estimation model

### Score sur l'entièreté de la vidéo

In [20]:
cap = cv2.VideoCapture(0)
current_seconds_time = int(time.time())
frame_number = 0
lis = []
## Setup mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor image to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
      
        # Make detection
        results = pose.process(image)
    
        # Recolor back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


        
        # Extract landmarks
        try:
            #'''
            #if results.pose_landmarks:
             #   write_landmarks_to_csv(result.pose_landmarks.landmark, frame_number, list)
              #  frame_number+=1

            landmarks = results.pose_landmarks.landmark
            write_landmarks(landmarks, lis)
            #print(lis)
           
                
            if (current_seconds_time != int(time.time())):
                if (current_seconds_time % duration_vid == 0):
                    #write_landmarks(results.pose_landmarks.landmark, lis)
                    df = pd.DataFrame(lis, columns=['landmarks', 'x', 'y','z'])
                    df["x"]=((df["x"]-df["x"].min())/(df["x"].max()-df["x"].min()))
                    df["y"]=((df["y"]-df["y"].min())/(df["y"].max()-df["y"].min()))
                    print(dtw.distance_fast(np.array(dfRef['x'], dfRef['y']), np.array(df['x'], df['y'])))
                    lis = []
                current_seconds_time = int(time.time())
            #'''
            #landmarks = results.pose_landmarks.landmark
            #print(landmarks)
        except:
            pass
        
        
        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
                                 )               
        
        cv2.imshow('Mediapipe Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

10.505945218266215
9.589654306244524
8.801088735538173
7.629435298372975
7.728127989792488
8.145590694711942
8.547734125443947
10.786681532023433
9.702418367547208
10.319026177454305
7.551021886509874
8.555146056187708


### Score tout les x temps 

In [7]:
evaluation = 2 # tout les combien de temps il va y avoir un feed back
#possible 1.6 pour one more time

one more time début du chrono à 5 s

In [18]:
cap = cv2.VideoCapture(0)
frame_number = 0
lis = []
listScore = []
font = cv2.FONT_HERSHEY_SIMPLEX 
count = 1
stop = False
debutLandmarks = False
debutTemps = False
debutVideo = False
## Setup mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor image to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
      
        # Make detection
        results = pose.process(image)
    
        # Recolor back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if (debutVideo == True or keyboard.is_pressed('g')): #la partie commence quand le joueur appuie sur la touche 'g'
            debutVideo = True
            if (debutTemps == False):
                current_seconds_time = int(time.time())
                temps_zero = round(time.time(),1)
                debutTemps = True
            if (stop == False):
                ts = time.time() - temps_zero
                if (ts >= 5.0 and ts < 6.0):
                    cv2.putText(image, '3', (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_AA)
                elif (ts >= 6.0 and ts < 7.0):
                    cv2.putText(image, '2', (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_AA)
                elif (ts >= 7.0 and ts < 8.0):
                    cv2.putText(image, '1', (50, 50), font, 1, (0, 255, 255), 2, cv2.LINE_AA)
                elif (ts >= 8.0):
                    debutLandmarks = True
                    stop = True
        
            try:

                landmarks = results.pose_landmarks.landmark
                write_landmarks(landmarks, lis)
                
                if (current_seconds_time != int(time.time())):
                    if (current_seconds_time % evaluation == 0 and debutLandmarks == True):
                        df = pd.DataFrame(lis, columns=['landmarks', 'x', 'y','z'])
                        df["x"]=((df["x"]-df["x"].min())/(df["x"].max()-df["x"].min()))
                        df["y"]=((df["y"]-df["y"].min())/(df["y"].max()-df["y"].min()))
                        dis = dtw.distance_fast(np.array(dfRef[(dfRef['time'] >= (count - 1) * evaluation) & (dfRef['time'] < count * evaluation)]['x'], 
                                                        dfRef[(dfRef['time'] >= (count - 1) * evaluation) & (dfRef['time'] < count * evaluation)]['y']), 
                                                        np.array(df['x'], df['y']))
                        listScore.append(dis)
                        print(dis)
                        lis = []
                        if (count < duration_vid / evaluation - 4):
                            count+=1
                        else: # fin de la vidéo et de la partie
                            count=0
                            print("fin de la partie")
                            #break
                    current_seconds_time = int(time.time())
            except:
                pass
        
        
        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
                                 )               
        
        
        cv2.imshow('Mediapipe Feed', image)
       

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

10.895555170755586
5.508986789447594
4.451696928354922
4.007059967113329
4.011728676175599


In [20]:
listScore

[10.895555170755586,
 5.508986789447594,
 4.451696928354922,
 4.007059967113329,
 4.011728676175599]

On pourrait obtenir un score pour chaque landmarks ou plusieurs pour avoir plus d'info sur une partie du corps du joueur

Attention pour le temps à la première itération initialiser le temps à 0

In [9]:
#Test
import keyboard

cu = int(time.time())
while(True):
    if(keyboard.is_pressed('q')):
        break 

    if (cu != int(time.time())):
        if (cu % 4 == 0):
            print("ok")
        cu = int(time.time())
    if(keyboard.is_pressed('a')):
        print("a")     

ok
ok
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a


In [20]:
#Test
print(dtw.distance_fast(np.array(dfRef['x'], dfRef['y']), np.array(df['x'], df['y'])))

10.165361391271915
