In [1]:
import os
import csv

In [2]:
def extract_landmarks_from_result(result):
    pass

In [3]:
def create_csv_output_folder(output_dir):
    if not os.path.exists(output_dir):  # if the folder does not exist create it
        os.mkdir(output_dir)


def create_csv_file(file_name, landmarks):
    # if the file does not exist create it
    if not os.path.exists(output_file_path):
        num_coords = len(results.pose_landmarks.landmark) + \
            len(results.face_landmarks.landmark)
        landmarks = []
        landmarks += ['timestamp']
        landmarks += ['class']

        for val in range(1, num_coords+1):
            landmarks += ['x{}'.format(val), 'y{}'.format(val),
                          'z{}'.format(val), 'v{}'.format(val)]

    with open(file_name, mode='w', newline='') as f:
        csv_writer = csv.writer(
            f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(landmarks)

def create_csv_output_file(output_file, results):
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, 'csv_output_folder')

    create_csv_output_folder(output_dir) # creating the output folder

    output_file = output_dir + '/' + output_file
    create_csv_file(output_file, landmarks) # creating the output file

In [8]:
import cv2
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import mediapipe as mp
import pandas as pd
import numpy as np


def extract_landmarks(landmarks):
    if landmarks:
        return list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in landmarks.landmark]).flatten())
    else:
        return [0] * 84


def process_video(input_file, output_file, gesture):
    mp_holistic, mp_drawing = mp.solutions.holistic, mp.solutions.drawing_utils
    with mp_holistic.Holistic(static_image_mode=True) as holistic:
        cap = cv2.VideoCapture(input_file)
        coords = []
        while cap.isOpened():
            success, image = cap.read()
            if not success:
                break
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            height, width, _ = image.shape
            results = holistic.process(image)

            # Extract landmarks for each body part
            face_row = extract_landmarks(results.face_landmarks)
            left_hand_row = extract_landmarks(results.left_hand_landmarks)
            right_hand_row = extract_landmarks(results.right_hand_landmarks)
            pose_row = extract_landmarks(results.pose_landmarks)

            # Combine all landmark points into a single list
            feature_vector = face_row + pose_row + left_hand_row + right_hand_row
            if results.face_landmarks:
                feature_vector = [gesture] + feature_vector
                coords.append(feature_vector)
        cap.release()
        df = pd.DataFrame(coords)
        df.to_csv(output_file, index=False)
        return df

In [9]:
df = process_video('happy1.mp4', 'train_set.csv', 'happy')
df = process_video('sad1.mp4', 'train_set.csv', 'sad')
# print(df.head())

In [None]:
def predict(input_file, model):
    mp_holistic = mp.solutions.holistic
    with mp_holistic.Holistic(static_image_mode=True) as holistic:
        cap = cv2.VideoCapture(input_file)
        while cap.isOpened():
            success, image = cap.read()
            if not success:
                break
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            height, width, _ = image.shape
            results = holistic.process(image)

            # Extract landmarks for each body part
            face_row = extract_landmarks(results.face_landmarks)
            left_hand_row = extract_landmarks(results.left_hand_landmarks)
            right_hand_row = extract_landmarks(results.right_hand_landmarks)
            pose_row = extract_landmarks(results.pose_landmarks)

            # Combine all landmark points into a single list
            feature_vector = face_row + pose_row + left_hand_row + right_hand_row
            if results.face_landmarks:
                # Reshape feature vector to match the input shape of the model
                feature_vector = np.array(feature_vector).reshape(1, -1)
                # Normalize the feature vector
                feature_vector = feature_vector / \
                    np.linalg.norm(feature_vector)
                # Make a prediction using the model
                prediction = model.predict(feature_vector)[0]
                # Display the prediction on the image
                cv2.putText(image, prediction, (50, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            # Show the image
            cv2.imshow('Video', image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()


In [None]:
# used to create a train set csv of coordinates
# reads a video file mp4 and add its coordinates to a csv file
# input_file: a video of a specific gesture of type mp4, 
# output_file: a CSV file containing the co-ordinates with the specified gesture

def f(input_file, output_file):
    pass