In [2]:
import cv2
import mediapipe as mp
import os

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)

# Drawing specifications
mp_face_mesh = mp.solutions.drawing_utils
drawing_spec = mp_face_mesh.DrawingSpec(thickness=1, circle_radius=1)

# Define the folders
folders = ['up', 'down', 'straight', 'right', 'left']

# Process each folder
for folder in folders:
    # Get all image files in the folder
    image_files = [f for f in os.listdir(folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    
    for image_file in image_files:
        image_path = os.path.join(folder, image_file)
        image = cv2.imread(image_path)
        if image is None:
            continue
        
        # Convert the BGR image to RGB
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Process the image and find face landmarks
        results = face_mesh.process(rgb_image)
        
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                # Draw the face landmarks on the image
                mp_face_mesh.draw_landmarks(
                    image=image,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_CONTOURS,
                    landmark_drawing_spec=drawing_spec,
                    connection_drawing_spec=drawing_spec)
        
        # Save the output image
        output_path = os.path.join(folder, f'output_{image_file}')
        cv2.imwrite(output_path, image)

# Release the MediaPipe resources
face_mesh.close()

print("Processing complete. Check the folders for output images with facial landmarks.")






Processing complete. Check the folders for output images with facial landmarks.


In [3]:
import cv2
import mediapipe as mp
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)

# Define the folders and labels
folders = ['up', 'down', 'straight', 'right', 'left']
labels_map = {folder: idx for idx, folder in enumerate(folders)}

# Data lists
features = []
labels = []

# Function to extract face landmarks as a feature vector
def extract_face_landmarks(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return None
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_image)
    if results.multi_face_landmarks:
        landmarks = results.multi_face_landmarks[0]
        landmark_points = []
        for landmark in landmarks.landmark:
            landmark_points.append(landmark.x)
            landmark_points.append(landmark.y)
            landmark_points.append(landmark.z)
        return landmark_points
    return None

# Process each folder
for folder in folders:
    image_files = [f for f in os.listdir(folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    for image_file in image_files:
        image_path = os.path.join(folder, image_file)
        landmarks = extract_face_landmarks(image_path)
        if landmarks is not None:
            features.append(landmarks)
            labels.append(labels_map[folder])

# Convert to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Initialize classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'KNN': KNeighborsClassifier(n_neighbors=3)
}

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"Classifier: {name}")
    print(classification_report(y_test, y_pred, target_names=folders))

# Release MediaPipe resources
face_mesh.close()



Classifier: SVM
              precision    recall  f1-score   support

          up       0.90      0.97      0.93       203
        down       0.87      0.85      0.86       151
    straight       0.86      0.90      0.88       205
       right       0.89      0.84      0.86       147
        left       0.98      0.91      0.95       180

    accuracy                           0.90       886
   macro avg       0.90      0.89      0.90       886
weighted avg       0.90      0.90      0.90       886

Classifier: Logistic Regression
              precision    recall  f1-score   support

          up       0.89      0.95      0.92       203
        down       0.89      0.85      0.87       151
    straight       0.82      0.90      0.86       205
       right       0.90      0.83      0.87       147
        left       0.98      0.91      0.94       180

    accuracy                           0.89       886
   macro avg       0.90      0.89      0.89       886
weighted avg       0.90      

In [None]:
# import cv2
# import mediapipe as mp
# import os
# import numpy as np
# from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
# from sklearn.svm import SVC
# from sklearn.linear_model import LogisticRegression
# from sklearn.ensemble import RandomForestClassifier, VotingClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.metrics import classification_report

# # Initialize MediaPipe Face Mesh
# mp_face_mesh = mp.solutions.face_mesh
# face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)

# # Define the folders and labels
# folders = ['up', 'down', 'straight', 'right', 'left']
# labels_map = {folder: idx for idx, folder in enumerate(folders)}

# # Data lists
# features = []
# labels = []

# # Function to extract face landmarks as a feature vector
# def extract_face_landmarks(image_path):
#     image = cv2.imread(image_path)
#     if image is None:
#         return None
#     rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     results = face_mesh.process(rgb_image)
#     if results.multi_face_landmarks:
#         landmarks = results.multi_face_landmarks[0]
#         landmark_points = []
#         for landmark in landmarks.landmark:
#             landmark_points.append([landmark.x, landmark.y, landmark.z])
#         return landmark_points
#     return None

# # Function to compute distances between key landmarks
# def compute_distances(landmarks):
#     distances = []
#     for i in range(len(landmarks)):
#         for j in range(i + 1, len(landmarks)):
#             distances.append(np.linalg.norm(np.array(landmarks[i]) - np.array(landmarks[j])))
#     return distances

# # Process each folder
# for folder in folders:
#     image_files = [f for f in os.listdir(folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
#     for image_file in image_files:
#         image_path = os.path.join(folder, image_file)
#         landmarks = extract_face_landmarks(image_path)
#         if landmarks is not None:
#             features.append(compute_distances(landmarks))
#             labels.append(labels_map[folder])

# # Convert to numpy arrays
# features = np.array(features)
# labels = np.array(labels)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# # Define the parameter grids for hyperparameter tuning
# param_grid_svm = {
#     'C': [0.1, 1, 10, 100],
#     'kernel': ['linear', 'rbf', 'poly'],
#     'gamma': [0.001, 0.01, 0.1, 1]
# }
# param_grid_rf = {
#     'n_estimators': [100, 200, 300],
#     'max_features': ['auto', 'sqrt'],
#     'max_depth': [10, 20, 30],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4]
# }
# param_grid_knn = {
#     'n_neighbors': [3, 5, 7, 9],
#     'weights': ['uniform', 'distance'],
#     'metric': ['euclidean', 'manhattan']
# }

# # Hyperparameter tuning with GridSearchCV
# grid_search_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
# grid_search_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
# grid_search_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

# # Fit models
# grid_search_svm.fit(X_train, y_train)
# grid_search_rf.fit(X_train, y_train)
# grid_search_knn.fit(X_train, y_train)

# # Best estimators
# best_svm = grid_search_svm.best_estimator_
# best_rf = grid_search_rf.best_estimator_
# best_knn = grid_search_knn.best_estimator_

# # Initialize Logistic Regression
# log_reg = LogisticRegression(max_iter=1000)

# # Voting Classifier
# voting_clf = VotingClassifier(estimators=[
#     ('svm', best_svm),
#     ('rf', best_rf),
#     ('knn', best_knn),
#     ('log_reg', log_reg)
# ], voting='hard')

# # Fit the voting classifier
# voting_clf.fit(X_train, y_train)

# # Evaluate classifiers
# classifiers = {
#     'SVM': best_svm,
#     'Logistic Regression': log_reg,
#     'Random Forest': best_rf,
#     'KNN': best_knn,
#     'Voting Classifier': voting_clf
# }

# # Cross-validation scores
# for name, clf in classifiers.items():
#     scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
#     print(f"Classifier: {name} - Cross-Validation Accuracy: {np.mean(scores):.2f} (+/- {np.std(scores):.2f})")

# # Test set evaluation
# for name, clf in classifiers.items():
#     y_pred = clf.predict(X_test)
#     print(f"Classifier: {name}")
#     print(classification_report(y_test, y_pred, target_names=folders))

# # Release MediaPipe resources
# face_mesh.close()

In [1]:
import cv2 
import mediapipe as mp
import numpy as np
import time

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh( max_num_faces=1, min_detection_confidence=0.5)

mp_drawing = mp.solutions.drawing_utils

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

cap = cv2.VideoCapture(0)

while cap.isOpened():
    succes, image = cap.read()
    
    start = time.time()
    
    
    #flip horizontally for a selfie-view display
    #BGR TO RGB
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    
    
    #perfomance improvement
    image.flags.writeable = False
    
    #results
    results = face_mesh.process(image)
    #RGB TO BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    img_h, img_w, img_c = image.shape
    face_3d = []
    face_2d = []
    
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
                        
                    x, y = int(lm.x * img_w), int(lm.y * img_h)
                    
                    #2d cord
                    face_2d.append([x, y])
                    
                    #3d cord
                    face_3d.append([x, y, lm.z])
                    
            #np array
            face_2d = np.array(face_2d, dtype=np.float64)
            face_3d = np.array(face_3d, dtype=np.float64)
            
            #cam mtx
            focal_lenght = 1 * img_w
            
            cam_mtx = np.array([[focal_lenght, 0, img_h/2],
                                [0, focal_lenght, img_w/2],
                                [0, 0, 1]])
            
            #distortin params
            dist_mtx = np.zeros((4, 1), dtype=np.float64)
            
            #solve pnp
            success, rot_vec, translation_vector = cv2.solvePnP(face_3d, face_2d, cam_mtx, dist_mtx)
            
            #rotational mtx
            rmat, jac = cv2.Rodrigues(rot_vec)
            
            #angles
            angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
            
            #rot degree
            x = angles[0] * 360
            y = angles[1] * 360
            z = angles[2] * 360
            
            #tilting position
            if y < -10:
                text = "Looking Left"
            elif y > 10: 
                text = "Looking Right"
            elif x < -10:
                text = "Looking Down" 
            elif x > 10: 
                text = "Looking Up" 
            else: 
                text = "Forward" 
                
            
            #nose direction
            nose_3d_projection, jac = cv2.projectPoints(nose_3d, rot_vec, translation_vector, cam_mtx, dist_mtx)
            
            p1 = (int(nose_2d[0]), int(nose_2d[1]))
            p2 = (int(nose_2d[0] + y * 10), int(nose_2d[1] - x * 10))
            
            cv2.line(image, p1, p2, (255, 0, 0), 3)
            
            #txt on img
            cv2.putText(image, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, f"X:{x:.2f} Y:{y:.2f} Z:{z:.2f}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            
        end = time.time()
        totalTime = end - start
        
        fps = 1 / totalTime
        print("FPS: ", fps)
        
        cv2.putText(image, f"FPS: {int(fps)}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        mp_drawing.draw_landmarks(image, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS, drawing_spec, drawing_spec)
        
        
    cv2.imshow("Head Pose Estimation", image)
    
    if cv2.waitKey(5) & 0xFF == 27:
        break
    
    
cap.release()






FPS:  143.248087431694
FPS:  200.39675107501193
FPS:  252.2737880428245
FPS:  200.61720954704165
FPS:  250.73553323768533
FPS:  250.73553323768533
FPS:  200.51171240080313
FPS:  200.5212984653631
FPS:  200.5308854465481
FPS:  250.69057438288206
FPS:  250.69057438288206
FPS:  250.69057438288206
FPS:  250.675591680612
FPS:  250.64563164814152
FPS:  200.6748002487919
FPS:  250.60070502479536
FPS:  250.90052042830652
FPS:  250.63065431729908
FPS:  250.60070502479536
FPS:  250.64563164814152
FPS:  250.87050660924697
FPS:  250.73553323768533
FPS:  250.4211594722073
FPS:  250.9455546248654
FPS:  250.88551262112694
FPS:  169.74115742614327
FPS:  250.76551476742796
FPS:  256.25024437927664
FPS:  250.88551262112694
FPS:  256.4068957085218
FPS:  253.5088546388637
FPS:  256.6579366050667
FPS:  256.6265296133138
FPS:  289.64187556108004
FPS:  256.42257137616923
FPS:  250.5557945041816
FPS:  200.8189217657761
FPS:  250.58573306249252
FPS:  250.8555023923445
FPS:  169.6313192590795
FPS:  250.66061076

KeyboardInterrupt: 