In [1]:
import json
import cv2
import os
import random

# Define paths for the dataset and output directories
dataset_path = r"C:\Users\Teddy\Documents\Skripsi\VRPose\dataset"
train_images_path = os.path.join(dataset_path, r'train2017\train2017')
train_annotations_file = os.path.join(dataset_path, 'coco_wholebody_train_v1.0.json')

# Define output path for augmented images
output_path_train = os.path.join(dataset_path, r'train2017\train_augmented_image_processing')
os.makedirs(output_path_train, exist_ok=True)

# Function to draw a randomized black or white rectangle (to simulate VR headset) over the face
def draw_randomized_vr_rectangle(image, face_box):
    # Extract face box coordinates and cast them to integers
    x, y, w, h = map(int, face_box)

    # Randomly choose between black and white for the VR headset color
    color = random.choice([(0, 0, 0), (255, 255, 255)])  # Black or White

    # Define VR headset dimensions (covering half the face vertically)
    vr_height = int(h / 2)  # Adjust this ratio if needed to fit the headset
    vr_width = w  # The width will cover the entire face width

    # Define the top-left and bottom-right coordinates of the rectangle
    top_left = (x, y)
    bottom_right = (x + vr_width, y + vr_height)

    # Draw a solid rectangle (randomly black or white) on the image
    cv2.rectangle(image, top_left, bottom_right, color, thickness=-1)

    return image

# Function to process images and apply the VR headset augmentation for multiple people
def process_images(annotations_file, images_path, output_path):
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)

    # Loop through all images in the dataset
    for image_info in annotations['images']:
        image_id = image_info['id']
        image_path = os.path.join(images_path, image_info['file_name'])

        # Load the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: Image at {image_path} could not be loaded.")
            continue

        # Find all annotations for the current image (multiple people possible)
        image_annotations = [ann for ann in annotations['annotations'] if ann['image_id'] == image_id]

        # Process each person (annotation) in the image
        for annotation in image_annotations:
            # Check if face keypoints are valid
            if annotation.get("face_valid"):
                face_box = annotation["face_box"]

                # DEBUG: Print the face box to check the coordinates
                print(f"Processing face box coordinates: {face_box} for image ID: {image_id}")

                # Apply randomized VR headset augmentation (black or white rectangle)
                image = draw_randomized_vr_rectangle(image, face_box)

        # Save the augmented image to the output directory
        output_image_path = os.path.join(output_path, image_info['file_name'])
        cv2.imwrite(output_image_path, image)

        print(f"Processed image saved at {output_image_path}")

# Process the train dataset
process_images(train_annotations_file, train_images_path, output_path_train)


Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000391895.jpg
Processing face box coordinates: [483.21, 69.79, 124.40000000000003, 106.2] for image ID: 522418
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000522418.jpg
Processing face box coordinates: [197.76, 93.32, 26.74000000000001, 22.070000000000007] for image ID: 184613
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000184613.jpg
Processing face box coordinates: [169.71, 99.83, 95.12999999999997, 123.30999999999999] for image ID: 318219
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000318219.jpg
Processing face box coordinates: [129.82, 208.79, 74.5, 96.1] for image ID: 554625
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\datas

In [2]:
import json
import cv2
import os
import numpy as np
import random  # To randomize headset selection

# Define paths for the dataset and output directories
dataset_path = r"C:\Users\Teddy\Documents\Skripsi\VRPose\dataset"
train_images_path = os.path.join(dataset_path, r'train2017\train2017')
train_annotations_file = os.path.join(dataset_path, 'coco_wholebody_train_v1.0.json')

# Define output path for augmented images
output_path_train = os.path.join(dataset_path, r'train2017\train_augmented_image_processing')
os.makedirs(output_path_train, exist_ok=True)

# Function to overlay a randomly chosen PNG image of a VR headset onto the face
def overlay_vr_headset(image, face_box, vr_image):
    # Extract face box coordinates and cast them to integers
    x, y, w, h = map(int, face_box)

    # Resize the VR headset image to fit the width of the face and half the height
    vr_height = int(h / 2)  # Adjust the height if needed
    vr_width = w
    resized_vr = cv2.resize(vr_image, (vr_width, vr_height))

    # Ensure the region of interest matches the resized VR image dimensions exactly
    roi = image[y:y + vr_height, x:x + vr_width]
    if roi.shape[:2] != resized_vr.shape[:2]:
        print(f"Skipping overlay due to shape mismatch: ROI shape {roi.shape}, VR shape {resized_vr.shape}")
        return image

    # Extract the alpha channel from the VR headset image for transparency
    alpha_vr = resized_vr[:, :, 3] / 255.0  # Normalize alpha channel to [0, 1]
    alpha_background = 1.0 - alpha_vr

    # Blend the VR headset image with the ROI using the alpha channel
    for c in range(0, 3):  # Loop over color channels (B, G, R)
        roi[:, :, c] = (alpha_vr * resized_vr[:, :, c] + alpha_background * roi[:, :, c])

    # Place the modified ROI back into the original image
    image[y:y + vr_height, x:x + vr_width] = roi

    return image

# Function to process images and apply the VR headset augmentation for multiple people
def process_images(annotations_file, images_path, output_path, vr_images):
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)

    # Loop through all images in the dataset
    for image_info in annotations['images']:
        image_id = image_info['id']
        image_path = os.path.join(images_path, image_info['file_name'])

        # Load the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: Image at {image_path} could not be loaded.")
            continue

        # Find all annotations for the current image (multiple people possible)
        image_annotations = [ann for ann in annotations['annotations'] if ann['image_id'] == image_id]

        # Process each person (annotation) in the image
        for annotation in image_annotations:
            # Check if face keypoints are valid
            if annotation.get("face_valid"):
                face_box = annotation["face_box"]

                # DEBUG: Print the face box to check the coordinates
                print(f"Processing face box coordinates: {face_box} for image ID: {image_id}")

                # Randomly select a VR headset from the list
                vr_image = random.choice(vr_images)

                # Apply the VR headset overlay using the randomly chosen PNG image
                image = overlay_vr_headset(image, face_box, vr_image)

        # Save the augmented image to the output directory
        output_image_path = os.path.join(output_path, image_info['file_name'])
        cv2.imwrite(output_image_path, image)

        print(f"Processed image saved at {output_image_path}")

# Function to load all available VR headset images
def load_vr_headset_images(vr_images_folder):
    vr_images = []
    for file_name in os.listdir(vr_images_folder):
        if file_name.endswith('.png'):
            vr_image_path = os.path.join(vr_images_folder, file_name)
            vr_image = cv2.imread(vr_image_path, cv2.IMREAD_UNCHANGED)
            if vr_image is not None:
                vr_images.append(vr_image)
            else:
                print(f"Warning: Could not load VR headset image at {vr_image_path}")
    return vr_images

# Define the path to the folder containing VR headset PNG images
vr_images_folder = r"C:\Users\Teddy\Documents\Skripsi\VRPose\VR_Headsets"

# Load all available VR headset images
vr_images = load_vr_headset_images(vr_images_folder)

# Check if we loaded any VR headset images
if len(vr_images) == 0:
    print("Error: No VR headset images found. Please check the folder path.")
else:
    # Process the train dataset
    process_images(train_annotations_file, train_images_path, output_path_train, vr_images)


Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000391895.jpg
Processing face box coordinates: [483.21, 69.79, 124.40000000000003, 106.2] for image ID: 522418
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000522418.jpg
Processing face box coordinates: [197.76, 93.32, 26.74000000000001, 22.070000000000007] for image ID: 184613
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000184613.jpg
Processing face box coordinates: [169.71, 99.83, 95.12999999999997, 123.30999999999999] for image ID: 318219
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\dataset\train2017\train_augmented_image_processing\000000318219.jpg
Processing face box coordinates: [129.82, 208.79, 74.5, 96.1] for image ID: 554625
Processed image saved at C:\Users\Teddy\Documents\Skripsi\VRPose\datas

error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4155: error: (-215:Assertion failed) inv_scale_x > 0 in function 'cv::resize'


In [None]:
val_annotations_file = os.path.join(dataset_path, 'coco_wholebody_val_v1.0.json')

In [None]:

# Load the annotation data from the JSON file
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Function to load the dataset
def load_pose_data(file_path, file_format='csv'):
    if file_format == 'csv':
        data = pd.read_csv(file_path)
    elif file_format == 'json':
        data = pd.read_json(file_path)
    elif file_format == 'npy':
        data = np.load(file_path)
    else:
        raise ValueError("Unsupported file format!")
    return data

# Function to preprocess the data (e.g., normalization)
def preprocess_data(data):
    # Assuming the data contains 2D keypoints as columns
    # Normalize the data (scaling between 0 and 1)
    scaler = StandardScaler()
    data_normalized = scaler.fit_transform(data)
    return data_normalized, scaler

# Function to split the data into training, validation, and testing sets
def split_data(data, test_size=0.2, validation_size=0.1):
    train_data, test_data = train_test_split(data, test_size=test_size, random_state=42)
    train_data, val_data = train_test_split(train_data, test_size=validation_size, random_state=42)
    return train_data, val_data, test_data

# Example usage
file_path = 'path_to_your_data.csv'  # Change this to your dataset path
pose_data = load_pose_data(file_path, file_format='csv')

# Preprocess the data
normalized_data, scaler = preprocess_data(pose_data)

# Split the data
train_data, val_data, test_data = split_data(normalized_data)

print("Training Data Shape:", train_data.shape)
print("Validation Data Shape:", val_data.shape)
print("Testing Data Shape:", test_data.shape)


NameError: name 'image_path' is not defined

In [None]:
# Load the annotation data from the JSON file
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)

In [None]:
import numpy as np

# Function to compute MPJPE (Mean Per Joint Position Error)
def compute_mpjpe(pred_keypoints, true_keypoints):
    # Compute the Euclidean distance between predicted and true keypoints for each joint
    errors = np.linalg.norm(pred_keypoints - true_keypoints, axis=-1)
    
    # Calculate the mean error across all joints
    mpjpe = np.mean(errors)
    return mpjpe

# Function to compute MPJRE (Mean Per Joint Rotation Error)
def compute_mpjre(pred_rotations, true_rotations):
    # Assuming pred_rotations and true_rotations are in radians
    # Compute the angular difference between predicted and true rotations
    angle_diffs = np.arccos(np.clip(np.sum(pred_rotations * true_rotations, axis=-1), -1.0, 1.0))
    
    # Calculate the mean rotation error across all joints
    mpjre = np.mean(np.degrees(angle_diffs))
    return mpjre

# Function to evaluate the model using MPJPE and MPJRE
def evaluate_pose_model_with_metrics(model, test_data):
    x_test, y_test = test_data[:, :input_shape[0]], test_data[:, input_shape[0]:]
    
    # Predict keypoints
    pred_keypoints = model.predict(x_test)
    
    # Reshape keypoints if necessary (e.g., from (N, 34) to (N, 17, 2) for 17 keypoints in 2D)
    pred_keypoints = pred_keypoints.reshape(-1, 17, 2)
    true_keypoints = y_test.reshape(-1, 17, 2)
    
    # Compute MPJPE
    mpjpe = compute_mpjpe(pred_keypoints, true_keypoints)
    print("MPJPE:", mpjpe)
    
    # If rotation data is available, compute MPJRE (for 3D rotations)
    # Assuming y_test also contains rotation data for each joint
    pred_rotations = None  # Replace with actual prediction of joint rotations
    true_rotations = None  # Replace with actual ground truth joint rotations
    if pred_rotations is not None and true_rotations is not None:
        mpjre = compute_mpjre(pred_rotations, true_rotations)
        print("MPJRE:", mpjre)
    
    return mpjpe

# Example usage
mpjpe = evaluate_pose_model_with_metrics(pose_model, test_data)


In [2]:
import cv2
import numpy as np

# Load the 2D keypoints (eyes, nose) from your dataset
# Example keypoints: [left_eye, right_eye, nose]
keypoints = {
    "left_eye": [200, 250],
    "right_eye": [400, 250],
    "nose": [300, 350]
}

# Camera matrix (assuming some default camera parameters)
camera_matrix = np.array([[800, 0, 320], [0, 800, 240], [0, 0, 1]], dtype="double")
dist_coeffs = np.zeros((4, 1))  # Assuming no lens distortion

# Define 3D model points for head landmarks (nose, chin, eyes, etc.)
# These are based on a general 3D model of a human head
model_points = np.array([
    [0.0, 0.0, 0.0],            # Nose tip
    [0.0, -330.0, -65.0],       # Chin
    [-225.0, 170.0, -135.0],    # Left eye corner
    [225.0, 170.0, -135.0],     # Right eye corner
    [-150.0, -150.0, -125.0],   # Left mouth corner
    [150.0, -150.0, -125.0]     # Right mouth corner
])

# VR model 3D points (includes straps and headset geometry)
vr_model_points = np.array([
    [0.0, 0.0, 0.0],           # VR front-center (positioned at nose)
    [0.0, 150.0, -50.0],       # VR top-center (above nose bridge)
    [-200.0, 50.0, -100.0],    # Left strap connection
    [200.0, 50.0, -100.0],     # Right strap connection
    [-100.0, 100.0, -150.0],   # Left bottom corner of headset
    [100.0, 100.0, -150.0],    # Right bottom corner of headset
    [-250.0, 75.0, -75.0],     # Left back strap
    [250.0, 75.0, -75.0]       # Right back strap
])

# Head pose estimation function
def estimate_head_pose(image_points, model_points, camera_matrix, dist_coeffs):
    # Use solvePnP to estimate the rotation and translation vectors (head pose)
    success, rotation_vector, translation_vector = cv2.solvePnP(
        model_points, image_points, camera_matrix, dist_coeffs
    )
    return success, rotation_vector, translation_vector

# Project the 3D VR model onto the 2D image using the estimated pose
def project_vr_headset(image, vr_model_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs):
    # Project 3D VR model points onto 2D image
    projected_points, _ = cv2.projectPoints(
        vr_model_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs
    )
    
    # Convert projected points to integers for use in drawing
    projected_points = projected_points.astype(int)
    
    # Draw the VR headset (using projected points) on the 2D image
    for point in projected_points:
        cv2.circle(image, tuple(point.ravel()), 5, (0, 255, 0), -1)  # Draw points in green
    
    # Optionally, draw lines between points for visualization (e.g., for straps)
    # Connect the left strap points
    cv2.line(image, tuple(projected_points[2].ravel()), tuple(projected_points[6].ravel()), (255, 0, 0), 2)
    # Connect the right strap points
    cv2.line(image, tuple(projected_points[3].ravel()), tuple(projected_points[7].ravel()), (255, 0, 0), 2)
    # Connect headset front to back strap
    cv2.line(image, tuple(projected_points[0].ravel()), tuple(projected_points[2].ravel()), (255, 0, 0), 2)
    cv2.line(image, tuple(projected_points[0].ravel()), tuple(projected_points[3].ravel()), (255, 0, 0), 2)
    
    return image

# 2D-to-3D point correspondence (from keypoints)
image_points = np.array([
    keypoints["nose"],       # Nose tip
    [keypoints["nose"][0], keypoints["nose"][1] + 330],  # Chin (approximated)
    keypoints["left_eye"],   # Left eye corner
    keypoints["right_eye"],  # Right eye corner
    [keypoints["left_eye"][0], keypoints["left_eye"][1] + 100],  # Left mouth corner (approx)
    [keypoints["right_eye"][0], keypoints["right_eye"][1] + 100] # Right mouth corner (approx)
], dtype="double")

# Load an example image
image = np.zeros((500, 500, 3), dtype=np.uint8)  # Dummy black image for demonstration

# Estimate head pose
success, rotation_vector, translation_vector = estimate_head_pose(image_points, model_points, camera_matrix, dist_coeffs)

if success:
    # Project and overlay the VR headset on the image
    image_with_vr = project_vr_headset(image, vr_model_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
    
    # Show the resulting image
    cv2.imshow("Image with 3D VR Headset Projection", image_with_vr)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("Head pose estimation failed.")
