Below we separate thermal pictures that were extracted using the thermal data with the cv2 inferno color using the Meta SAM model. We get a segmentation of each hand separately.

We achieve successful(both hands) segmentation at 70%. This rate includes cold hands that struggle to be detected, excluding cold hands the rate is bigger around 90%.

In [None]:
import os
import glob
import cv2
import numpy as np
from segment_anything import sam_model_registry, SamPredictor

# === Enhance contrast ===
def enhance_contrast(image):
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    enhanced_lab = cv2.merge((cl, a, b))
    return cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)

# === Improved feathering ===
def feather_edges(image, mask, feather_amount=5):
    mask = mask.astype(np.float32)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask_dilated = cv2.dilate(mask, kernel, iterations=1)
    edge = mask_dilated - mask
    edge_blur = cv2.GaussianBlur(edge, (0, 0), feather_amount)
    soft_mask = np.clip(mask + edge_blur, 0, 1)
    alpha = np.stack([soft_mask] * 3, axis=-1)
    blended = (image * alpha).astype(np.uint8)
    return blended

# === Settings ===
project_dir = ""
inferno_dir = os.path.join(project_dir, "data", "images", "inferno")
desired_size = (224, 224)
min_area = 1000

# === Load SAM ===
sam = sam_model_registry["vit_h"](
    checkpoint=os.path.join(project_dir, "models", "sam_vit_h_4b8939.pth")
)
predictor = SamPredictor(sam)

# === Output directory ===
output_dir = os.path.join(project_dir, "inferno_output")
os.makedirs(output_dir, exist_ok=True)
print(f"\n Processing inferno images in: {inferno_dir}")

# === Process only inferno images ===
for file_path in glob.glob(f"{inferno_dir}/*.jpg"):
    filename = os.path.basename(file_path)
    print(f"\n Processing image: {filename}")

    image = cv2.imread(file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    if image.shape[1] != desired_size[0] or image.shape[0] != desired_size[1]:
        image = cv2.resize(image, desired_size, interpolation=cv2.INTER_AREA)

    image = enhance_contrast(image)
    predictor.set_image(image)

    h, w = desired_size[1], desired_size[0]
    input_point = np.array([
        [int(w * 0.25), int(h * 0.6)],
        [int(w * 0.75), int(h * 0.6)],
        [int(w * 0.25), int(h * 0.4)],
        [int(w * 0.75), int(h * 0.4)],
        [int(w * 0.5), int(h * 0.85)]
    ])
    input_label = np.array([1, 1, 1, 1, 0])

    masks, _, _ = predictor.predict(
        point_coords=input_point,
        point_labels=input_label,
        multimask_output=False
    )

    mask = masks[0].astype(np.uint8)
    num_labels, labels = cv2.connectedComponents(mask)
    print(f" Found {num_labels - 1} components.")

    hands = []
    for label in range(1, num_labels):
        hand_mask = (labels == label).astype(np.uint8)
        area = cv2.countNonZero(hand_mask)
        if area < min_area:
            continue
        M = cv2.moments(hand_mask)
        if M["m00"] == 0:
            continue
        center_x = int(M["m10"] / M["m00"])
        hands.append((center_x, hand_mask))

    if len(hands) == 1:
        print("Only 1 component found attempting to split")
        big_mask = hands[0][1]
        mid_x = w // 2
        left_mask = np.zeros_like(big_mask)
        right_mask = np.zeros_like(big_mask)
        left_mask[:, :mid_x] = big_mask[:, :mid_x]
        right_mask[:, mid_x:] = big_mask[:, mid_x:]
        if cv2.countNonZero(left_mask) > min_area and cv2.countNonZero(right_mask) > min_area:
            hands = [(int(w * 0.25), left_mask), (int(w * 0.75), right_mask)]
        else:
            print("Failed to split  skipping image.")
            continue

    if len(hands) != 2:
        print("Skipping did not find exactly 2 hands.")
        continue

    hands.sort(key=lambda h: h[0])
    base_name = os.path.splitext(filename)[0]

    for i, (center_x, hand_mask) in enumerate(hands):
        side = "left" if i == 0 else "right"
        feathered_hand = feather_edges(image, hand_mask, feather_amount=5)
        output_path = os.path.join(output_dir, f"{base_name}_{side}_hand.jpg")
        cv2.imwrite(output_path, cv2.cvtColor(feathered_hand, cv2.COLOR_RGB2BGR))
        print(f" Saved {side} hand to: {output_path}")

Below we separate the segmented hands that we got from the SAM into the joints, and the tips of the fingers using the google HandLandMarker model.

We achieve successful joint recognition at 100% of all successful segmented hands.

In [None]:
import os
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# === Paths ===
project_dir = ""
input_dir = os.path.join(project_dir, "inferno_output")
output_dir = os.path.join(project_dir, "inferno_landmarks")
os.makedirs(output_dir, exist_ok=True)

# === Load MediaPipe model ===
model_path = os.path.join(project_dir, "models", "hand_landmarker.task")
BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = vision.HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2
)

landmarker = vision.HandLandmarker.create_from_options(options)

# === Process each image ===
for file in os.listdir(input_dir):
    if not file.lower().endswith((".jpg", ".png", ".jpeg")):
        continue

    image_path = os.path.join(input_dir, file)
    image = cv2.imread(image_path)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)

    result = landmarker.detect(mp_image)

    if result.hand_landmarks:
        for hand in result.hand_landmarks:
            for landmark in hand:
                x_px = int(landmark.x * image.shape[1])
                y_px = int(landmark.y * image.shape[0])
                cv2.circle(image, (x_px, y_px), 3, (0, 255, 0), -1)

    output_path = os.path.join(output_dir, f"landmarks_{file}")
    cv2.imwrite(output_path, image)
    print(f"Saved landmark image to: {output_path}")

This code also adds the center of the hand based on the joints.

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# === Paths ===
project_dir = ""
input_dir = os.path.join(project_dir, "inferno_output")
output_dir = os.path.join(project_dir, "inferno_landmarks")
os.makedirs(output_dir, exist_ok=True)

# === Load MediaPipe model ===
model_path = os.path.join(project_dir, "models", "hand_landmarker.task")
BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = vision.HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2
)
landmarker = vision.HandLandmarker.create_from_options(options)

# === Utilities ===
def to_pixel(landmark, image_shape):
    h, w = image_shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def intersect(p1, p2, p3, p4):
    a1 = p2[1] - p1[1]
    b1 = p1[0] - p2[0]
    c1 = a1 * p1[0] + b1 * p1[1]

    a2 = p4[1] - p3[1]
    b2 = p3[0] - p4[0]
    c2 = a2 * p3[0] + b2 * p3[1]

    determinant = a1 * b2 - a2 * b1
    if determinant == 0:
        return None
    x = (b2 * c1 - b1 * c2) / determinant
    y = (a1 * c2 - a2 * c1) / determinant
    return int(x), int(y)

# === Main loop ===
for file in os.listdir(input_dir):
    if not file.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    image_path = os.path.join(input_dir, file)
    image = cv2.imread(image_path)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)

    result = landmarker.detect(mp_image)

    if result.hand_landmarks:
        for hand_landmarks in result.hand_landmarks:
            pts = {
                "thumb": to_pixel(hand_landmarks[1], image.shape),
                "pinky": to_pixel(hand_landmarks[17], image.shape),
                "middle": to_pixel(hand_landmarks[12], image.shape),
                "wrist": to_pixel(hand_landmarks[0], image.shape)
            }
            center = intersect(pts["thumb"], pts["pinky"], pts["wrist"], pts["middle"])
            if center:
                cv2.drawMarker(image, center, (255, 0, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)

            for landmark in hand_landmarks:
                x, y = to_pixel(landmark, image.shape)
                cv2.circle(image, (x, y), 2, (0, 255, 0), -1)

    output_path = os.path.join(output_dir, f"landmarks_{file}")
    cv2.imwrite(output_path, image)
    print(f"Saved with palm center marked: {output_path}")

Below we separate optical pictures that were extracted using the Meta SAM model with a slightly different algorithm. We get a segmentation of both hands together.

We achieve successful segmentation at 78%.

In [None]:
import os
from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator

# === Load SAM ===
sam = sam_model_registry["vit_h"](
    checkpoint=os.path.join(project_dir, "models", "sam_vit_h_4b8939.pth")
)
mask_generator = SamAutomaticMaskGenerator(sam)

In [None]:
import os
import glob
import cv2
import numpy as np

# === Enhance contrast ===
def enhance_contrast(image):
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    enhanced_lab = cv2.merge((cl, a, b))
    return cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)

# === Improved feathering ===
def feather_edges(image, mask, feather_amount=5):
    mask = mask.astype(np.float32)

    # Dilate mask to preserve fingers and edges
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask_dilated = cv2.dilate(mask, kernel, iterations=1)

    # Blur only the border by subtracting original from dilated
    edge = mask_dilated - mask
    edge_blur = cv2.GaussianBlur(edge, (0, 0), feather_amount)

    # Create smooth transition alpha mask
    soft_mask = np.clip(mask + edge_blur, 0, 1)
    alpha = np.stack([soft_mask] * 3, axis=-1)

    blended = (image * alpha).astype(np.uint8)
    return blended

def average_min_distance_to_corners_with_center(mask):
    h, w = mask.shape
    corners = np.array([
        [0, 0],            # top-left
        [0, w - 1],        # top-right
        [h // 2, 0],       # middle-left
        [h // 2, w - 1],   # middle-right
        [0, w // 2],       # top-center
        [0, w // 2]        # top-center again to weight it more
    ])

    ys, xs = np.nonzero(mask)
    if len(ys) == 0:
        return -np.inf  # totally empty mask

    points = np.stack([ys, xs], axis=1)

    # === Corner Score ===
    min_dists = [np.min(np.linalg.norm(points - corner, axis=1)) for corner in corners]
    score = np.sum(min_dists)

    # === Edge Penalty ===
    if np.any(mask[0, :]) or np.any(mask[:h // 2, 0]) or np.any(mask[:h // 2, w - 1]): 
        score *= 0.5
        
    # === Center Proximity Score ===
    mask_center = np.mean(points, axis=0)  # (y, x)
    image_center = np.array([h / 2, w / 2])
    avg_center_dist = np.linalg.norm(mask_center - image_center)

    # Max possible distance = image diagonal (for normalization)
    max_dist = np.linalg.norm([h / 2, w / 2])
    center_score = (1 + (avg_center_dist / max_dist)) * score  # proportional weight

    # === Final Score ===
    final_score = score + center_score
    return final_score

def calc_mask(masks, width_upper, width_lower, area_lower, area_higher):
    # Filter masks based on area ratio
    valid_masks, valid_dist = [], []
    for i, m in enumerate(masks):
        mask = m["segmentation"].astype(np.uint8)
        area = cv2.countNonZero(mask)
        area_ratio = area / image_area

        # === Width check ===
        x, y, mask_w, mask_h = cv2.boundingRect(mask)
        width_ratio = mask_w / w
        height_ratio = mask_h / h
        if width_ratio > width_upper or width_ratio < width_lower or height_ratio > 0.9:
            continue
        if area_lower <= area_ratio <= area_higher:
            valid_masks.append((i, mask))
            valid_dist.append(average_min_distance_to_corners_with_center(mask))
    return valid_masks, valid_dist

# === Settings ===
project_dir =  ""
images_base_dir = os.path.join(project_dir, "data", "images")
desired_size = (320, 240)
w, h = desired_size[0], desired_size[1]
min_area = 1000

# === Loop through subfolders ===
for colormap_folder in os.listdir(images_base_dir):
    if colormap_folder == "optical":
        folder_path = os.path.join(images_base_dir, colormap_folder)
        if not os.path.isdir(folder_path):
            continue

        output_dir = os.path.join(project_dir, f"{colormap_folder}_output8")
        os.makedirs(output_dir, exist_ok=True)
        print(f"\n Processing folder: {colormap_folder}")

        for file_path in glob.glob(f"{folder_path}/*.jpg"):
            filename = os.path.basename(file_path)
            print(f"\n Processing image: {filename}")

            image = cv2.imread(file_path)
            # Moved this line to save part
            #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            if image.shape[1] != desired_size[0] or image.shape[0] != desired_size[1]:
                image = cv2.resize(image, desired_size, interpolation=cv2.INTER_AREA)

            image = enhance_contrast(image)

            masks = mask_generator.generate(image)

            image_area = h * w
            valid_masks, valid_dist = [], []
            width_upper, width_lower, area_lower, area_higher = 0.9, 0.2, 0.12, 0.8
            valid_masks, valid_dist = calc_mask(masks, width_upper, width_lower, area_lower, area_higher)
            save_mask = []
            max_dist = -1
            best_index = -1

            for idx in range(len(valid_dist)):
                if valid_dist[idx] > max_dist:
                    max_dist = valid_dist[idx]
                    best_index = idx

            if best_index != -1:
                save_mask.append(valid_masks[best_index])

            # Save the two valid masks
            base_name = os.path.splitext(filename)[0]
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            for i, (idx, mask) in enumerate(save_mask):
                feathered_hand = feather_edges(image, mask, feather_amount=5)
                output_path = os.path.join(output_dir, f"{base_name}_hands.jpg")
                cv2.imwrite(output_path, cv2.cvtColor(feathered_hand, cv2.COLOR_RGB2BGR))
                print(f"Saved hands (mask {idx}) to: {output_path}")