Load rgb original image

In [1]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize Mediapipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Load the image
image_path = "../../datasets/ring_try_on_input_data/images/original_0.png"  # Replace with the path to your hand image
image = cv2.imread(image_path)

# Convert the image to RGB (Mediapipe uses RGB images)
imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imgWidth, imgHeight = len(imageRGB[0]), len(imageRGB)
print(imgWidth, imgHeight)

1920 1440


Use MediaPipe to detect hand

In [2]:
results = None
# Initialize the Hand Tracking model
with mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5) as hands:
    # Process the image
    results = hands.process(imageRGB)

    # Check if any hands are detected
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw the landmarks on the original image
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Print landmark coordinates
            for idx, landmark in enumerate(hand_landmarks.landmark):
                print(f"Landmark {idx}: x={landmark.x}, y={landmark.y}, z={landmark.z}")

    else:
        print("No hands detected.")

Landmark 0: x=0.715581476688385, y=0.4338538646697998, z=3.717768777278252e-07
Landmark 1: x=0.7039138078689575, y=0.5395265221595764, z=-0.013928795233368874
Landmark 2: x=0.6543437838554382, y=0.6085299253463745, z=-0.024222364649176598
Landmark 3: x=0.5916611552238464, y=0.635909914970398, z=-0.03132377564907074
Landmark 4: x=0.5426812767982483, y=0.663235604763031, z=-0.03824285790324211
Landmark 5: x=0.5637412667274475, y=0.558074951171875, z=-0.03281363099813461
Landmark 6: x=0.4728052616119385, y=0.5733675956726074, z=-0.04696201905608177
Landmark 7: x=0.42245492339134216, y=0.5797991752624512, z=-0.05384199693799019
Landmark 8: x=0.3819367289543152, y=0.583411455154419, z=-0.057277824729681015
Landmark 9: x=0.5528971552848816, y=0.49372825026512146, z=-0.034468427300453186
Landmark 10: x=0.4579400420188904, y=0.5157116651535034, z=-0.04545045644044876
Landmark 11: x=0.3998931646347046, y=0.5243610143661499, z=-0.05225352197885513
Landmark 12: x=0.3568154573440552, y=0.527469158

I0000 00:00:1738305688.896836    2214 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 4
I0000 00:00:1738305688.934984    2398 gl_context.cc:369] GL version: 3.0 (OpenGL ES 3.0 Mesa 21.2.6), renderer: D3D12 (NVIDIA GeForce RTX 3060 Laptop GPU)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1738305688.974721    2367 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738305688.991664    2369 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738305689.011675    2378 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [3]:
points = results.multi_hand_landmarks

handLandmarks = list()

i = 1
for hand_landmarks in results.multi_hand_landmarks:
    # Draw the landmarks on the original image
    mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
    # Print landmark coordinates
    for idx, landmark in enumerate(hand_landmarks.landmark):
        handLandmarks.append([landmark.x, landmark.y, landmark.z])
handLandmarks = np.array(handLandmarks)
print(handLandmarks)

[[ 7.15581477e-01  4.33853865e-01  3.71776878e-07]
 [ 7.03913808e-01  5.39526522e-01 -1.39287952e-02]
 [ 6.54343784e-01  6.08529925e-01 -2.42223646e-02]
 [ 5.91661155e-01  6.35909915e-01 -3.13237756e-02]
 [ 5.42681277e-01  6.63235605e-01 -3.82428579e-02]
 [ 5.63741267e-01  5.58074951e-01 -3.28136310e-02]
 [ 4.72805262e-01  5.73367596e-01 -4.69620191e-02]
 [ 4.22454923e-01  5.79799175e-01 -5.38419969e-02]
 [ 3.81936729e-01  5.83411455e-01 -5.72778247e-02]
 [ 5.52897155e-01  4.93728250e-01 -3.44684273e-02]
 [ 4.57940042e-01  5.15711665e-01 -4.54504564e-02]
 [ 3.99893165e-01  5.24361014e-01 -5.22535220e-02]
 [ 3.56815457e-01  5.27469158e-01 -5.68891726e-02]
 [ 5.58187962e-01  4.33775276e-01 -3.62492017e-02]
 [ 4.71347451e-01  4.49970961e-01 -4.86130491e-02]
 [ 4.16234374e-01  4.54358280e-01 -5.76362684e-02]
 [ 3.75274181e-01  4.57525283e-01 -6.32283613e-02]
 [ 5.71821272e-01  3.78316045e-01 -3.80291380e-02]
 [ 5.03393590e-01  3.92694533e-01 -4.97273393e-02]
 [ 4.61297244e-01  4.03046548e-

Get data from LiDAR

In [None]:
depthLogs = "../../datasets/ring_try_on_input_data/images/depth_logs_0.txt"  # Replace with the path to your hand image
depthData = np.loadtxt(depthLogs, delimiter=",")
print(depthData)
print(len(depthData))
print(len(depthData[0]))

In [None]:
resizedDepthData = cv2.resize(depthData, (1920, 1440), interpolation=cv2.INTER_CUBIC)
resizedDepthData = resizedDepthData * 1000 # from mm to meters ?
print(resizedDepthData.shape)

Get intrinsics

In [104]:
with open("../../datasets/ring_try_on_input_data/images/depth_calibration_logs_1.txt", "r") as f:
    lines = f.readlines()[1:4]

intrinsics = np.array([list(map(float, line.split(','))) for line in lines])
fx, fy, cx, cy = intrinsics[0][0], intrinsics[1][1], intrinsics[0][2], intrinsics[1][2]

Get point cloud of hand landmarks

In [105]:
landmarks3D = list()
h, w = resizedDepthData.shape
for landmark in handLandmarks:
    xL, yL = int(landmark[0] * w), int(landmark[1] * h)
    depthXY = resizedDepthData[yL, xL]
    x3d = (xL - cx) * depthXY / fx
    y3d = (yL - cy) * depthXY / fy
    z3d = depthXY
    landmarks3D.append([x3d, y3d, z3d])

landmarks3D = np.array(landmarks3D)

Find ring basis vectors

In [106]:
# Ring finger
landmark5, landmark6, landmark9 = landmarks3D[5], landmarks3D[6], landmarks3D[9]

ringPosition = (landmark5 + landmark6) / 2

xAxisDirection = (landmark6 - landmark5)
xAxis = xAxisDirection / np.linalg.norm(xAxisDirection)

yAxisDirection = (landmark9 - landmark5)
yAxis = yAxisDirection / np.linalg.norm(yAxisDirection)

zAxisDirection = np.cross(xAxisDirection, yAxisDirection)
zAxis = zAxisDirection / np.linalg.norm(zAxisDirection)

# Recompute yAxis to be perpendicular to xAxis
yAxisNewDirection = np.cross(zAxis, xAxis)
yAxis = yAxisNewDirection / np.linalg.norm(yAxisNewDirection)

In [None]:
print(xAxis)
print(yAxis)
print(zAxis)

Forming transformation map

In [108]:
xForTMatrix = yAxis
yForTMatrix = xAxis
zForTMatrix = -1 * zAxis

In [None]:
print(xForTMatrix)
print(yForTMatrix)
print(zForTMatrix)

In [None]:
transformMatrix = np.eye(4)
transformMatrix[:3, 0] = xForTMatrix
transformMatrix[:3, 1] = yForTMatrix
transformMatrix[:3, 2] = zForTMatrix
transformMatrix[:3, 3] = ringPosition

print(transformMatrix)

In [None]:
print(ringPosition)
print()
print(transformMatrix)

In [None]:
ringPosition_homogeneous = np.append(ringPosition, 1)
cameraPose = np.matmul(transformMatrix, ringPosition_homogeneous)
print(cameraPose)

In [None]:
import plotly.graph_objects as go
import numpy as np

# Create new variables for scaled vectors
xAxis_scaled = xForTMatrix * 50
yAxis_scaled = yForTMatrix * 50
zAxis_scaled = zForTMatrix * 50

# Create a figure
fig = go.Figure()

# Plot the vectors using the quiver function with scaled vectors
fig.add_trace(go.Cone(
    x=[ringPosition[0]], y=[ringPosition[1]], z=[ringPosition[2]],
    u=[xAxis_scaled[0]], v=[xAxis_scaled[1]], w=[xAxis_scaled[2]],
    colorscale='reds', sizemode="scaled", showscale=False, anchor="tail"
))
fig.add_trace(go.Cone(
    x=[ringPosition[0]], y=[ringPosition[1]], z=[ringPosition[2]],
    u=[yAxis_scaled[0]], v=[yAxis_scaled[1]], w=[yAxis_scaled[2]],
    colorscale='greens', sizemode="scaled", showscale=False, anchor="tail"
))
fig.add_trace(go.Cone(
    x=[ringPosition[0]], y=[ringPosition[1]], z=[ringPosition[2]],
    u=[zAxis_scaled[0]], v=[zAxis_scaled[1]], w=[zAxis_scaled[2]],
    colorscale='blues', sizemode="scaled", showscale=False, anchor="tail"
))

# Plot the 3D landmarks
fig.add_trace(go.Scatter3d(
    x=landmarks3D[:, 0], y=landmarks3D[:, 1], z=landmarks3D[:, 2],
    mode='markers', marker=dict(size=5, color='black')
))

# Define camera view

# Set axis labels
fig.update_layout(
    scene=dict(
        xaxis_title='X-axis',
        yaxis_title='Y-axis',
        zaxis_title='Z-axis'
    ),
    showlegend=False
)

fig.update_layout(
    width=1200,  # Increase figure width
    height=800,  # Increase figure height
)

# Show the plot
fig.show()

Run Blender Rendering

"C:\Program Files\Blender Foundation\Blender 4.3\blender.exe" "C:\Users\Владелец\Downloads\kilce_001.blend" --background --python "C:\Users\Владелец\Downloads\blender_renderer.py"


In [None]:
import bpy
from pathlib import Path
from mathutils import Matrix
import numpy as np

class BlenderRenderSetup:
    def __init__(self, p_root: Path, dn_ext="OPEN_EXR"):
        assert dn_ext in ["OPEN_EXR", "PNG"]

        self.camera_name: str = None
        self.camera = None
        self.tree = None

        self.p_root = p_root
        self.p_out_render = None
        self.dn_ext = dn_ext.upper()

    def _set_camera(
            self,
            camera_name: str,
    ) -> None:
        self.camera = bpy.data.objects.get(camera_name)
        assert self.camera is not None
        self.camera_name = camera_name

    def _set_render_path(
            self
    ) -> None:
        assert self.camera_name
        self.p_out_render = Path(self.p_root) / self.camera_name
        self.p_out_render.mkdir(exist_ok=True, parents=True)

    def _set_context(
            self,
            px: int = 1024,
            py: int = 1024,
    ) -> None:
        assert self.camera is not None
        bpy.context.scene.camera = self.camera
        bpy.context.scene.render.resolution_x = px
        bpy.context.scene.render.resolution_y = py
        bpy.context.scene.use_nodes = True

        bpy.context.view_layer.use_pass_z = True
        bpy.context.view_layer.use_pass_normal = True
        bpy.context.view_layer.use_pass_object_index = True
        bpy.context.scene.render.film_transparent = True

    def _get_output_node(
            self,
            tree,
            label,
            file_format: str,
            use_alpha: str = False
    ):
        base_path = self.p_out_render / f"_{label}"
        base_path.mkdir(exist_ok=True, parents=True)

        node = tree.nodes.new(type="CompositorNodeOutputFile")
        node.base_path = str(base_path)
        node.format.file_format = file_format

        if use_alpha:
            node.format.color_mode = 'RGBA'

        return node

    def _set_nodes_tree(
            self
    ) -> None:
        self.tree = bpy.context.scene.node_tree
        self.tree.nodes.clear()

        self.render_layers = self.tree.nodes.new(type="CompositorNodeRLayers")
        self.composite = self.tree.nodes.new(type="CompositorNodeComposite")

        self.file_output_color = self._get_output_node(self.tree, "color", "PNG", use_alpha=True)
        self.file_output_depth = self._get_output_node(self.tree, "depth", self.dn_ext)
        self.file_output_normal = self._get_output_node(self.tree, "normal", self.dn_ext)
    def _link_nodes(self):
        assert self.tree is not None

        print(self.render_layers.outputs)
        self.tree.links.new(self.render_layers.outputs["Image"], self.file_output_color.inputs[0])
        self.tree.links.new(self.render_layers.outputs["Depth"], self.file_output_depth.inputs[0])
        self.tree.links.new(self.render_layers.outputs["Normal"], self.file_output_normal.inputs[0])
        # self.tree.links.new(self.render_layers.outputs["IndexOB"], self.file_output_mask.inputs[0])
        self.tree.links.new(self.render_layers.outputs["Image"], self.composite.inputs[0])

    def _transform_matrix_to_euler(self, transform_matrix):
        rotation_matrix = transform_matrix[:3, :3]
        location = transform_matrix[:3, 3]
        euler_angles = Matrix(rotation_matrix).to_euler()
        return euler_angles, location

    def _setup(
            self,
            camera_name: str,
            px: int = 1920,
            py: int = 1440,
    ) -> None:
        self._set_camera(camera_name)
        self._set_render_path()
        self._set_context(px=px, py=py)
        self._set_nodes_tree()
        self._link_nodes()

        # transform_matrix = np.array([[-1.83043312e-01, -9.01450335e-01, -3.92278524e-01,  2.99722454e+01],
        #                              [-9.61303787e-01,  8.05525760e-02,  2.63450778e-01,  3.63439362e+01],
        #                              [-2.05888746e-01,  4.25321733e-01, -8.81312230e-01,  3.40888294e+02],
        #                              [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00]
        #                              ])

        # m2 = np.array([[1, 0, 0,  -0.06836785606168191],
        #                [0, 1, 0, -0.2043487541167911],
        #                [0, 0, 1,  0],
        #                [0, 0, 0,   1]])
        
        #euler_camera, camera_location = self._transform_matrix_to_euler(m2)

        euler_angles, location = self._transform_matrix_to_euler(transformMatrix)

        bpy.data.objects["ring"].location = (location[1], location[0], location[2])
        bpy.data.objects["ring"].rotation_euler = euler_angles
        # bpy.data.objects["bottom"].location = camera_location
        # bpy.data.objects["bottom"].rotation_euler = euler_camera

    def _render(
            self
    ) -> None:
        bpy.ops.render.render(write_still=True)
        print(f"Rendered color, depth, and normal maps saved to {self.p_out_render}")

    def render(
            self,
            camera_name: str,
            px: int = 1920,
            py: int = 1440,
    ) -> None:
        self._setup(
            camera_name=camera_name,
            px=px, py=py
        )

        assert self.camera is not None
        assert self.tree is not None

        self._render()


if __name__ == '__main__':
    p_root = Path(r"./")

    renderer = BlenderRenderSetup(p_root)
    for view in ['bottom', 'front']:
        renderer.render(view)

Put RGBA image on top of original RGB image

In [None]:
from PIL import Image

def overlay_images(rgb_image_path, rgba_image_path, output_image_path):
    rgb_image = Image.open(rgb_image_path).convert("RGB")
    rgba_image = Image.open(rgba_image_path).convert("RGBA")

    rgba_image = rgba_image.resize(rgb_image.size)

    background = rgb_image.convert("RGBA")
    overlay = Image.alpha_composite(background, rgba_image)

    overlay.save(output_image_path)

def rgb_to_rgba(rgb_image_path, output_image_path):
    rgb_image = Image.open(rgb_image_path).convert("RGB")
    rgba_image = rgb_image.convert("RGBA")
    rgba_image.save(output_image_path)

overlay_images('original_1.png', 'ringRGBA.png', 'output_image1.png')
