In [1]:
import os
import json
import numpy as np
import trimesh
from tqdm import tqdm

In [3]:
# Config
ORIGINAL_JSON = "guitar.json"
MESH_DIR = "Guitars/All"
OUTPUT_JSON = "guitar_augmented.json"
NUM_AUGMENTATIONS = 5  # How many times to augment each sample
NUM_POINTS = 8192  # Point cloud size

In [4]:
# Load original dataset
with open(ORIGINAL_JSON, 'r') as f:
    original_data = json.load(f)

augmented_data = []

for sample in tqdm(original_data, desc="Augmenting dataset"):
    model_id = sample["model_id"]
    mesh_path = os.path.join(MESH_DIR, model_id + ".ply")
    
    if not os.path.exists(mesh_path):
        print(f"Mesh {mesh_path} not found, skipping.")
        continue
    
    mesh = trimesh.load(mesh_path, force='mesh')
    if mesh.is_empty or len(mesh.faces) == 0:
        print(f"Mesh {model_id} is empty, skipping.")
        continue

    # Original keypoints
    keypoints = np.array([kp["xyz"] for kp in sample["keypoints"]])
    
    # Create multiple augmentations for each sample
    for aug_id in range(NUM_AUGMENTATIONS):
        points, _ = trimesh.sample.sample_surface(mesh, NUM_POINTS)

        # --- Apply augmentation --- #
        # 1. Random Z rotation
        theta = np.random.uniform(0, 2*np.pi)
        cos_theta, sin_theta = np.cos(theta), np.sin(theta)
        Rz = np.array([[cos_theta, -sin_theta, 0],
                       [sin_theta,  cos_theta, 0],
                       [0, 0, 1]])
        points = points @ Rz.T
        keypoints_aug = keypoints @ Rz.T

        # 2. Uniform scaling (90% to 110%)
        scale = np.random.uniform(0.9, 1.1)
        points *= scale
        keypoints_aug *= scale

        # 3. XY translation (±0.05 shift)
        shift_xy = np.random.uniform(-0.05, 0.05, size=(1, 2))
        points[:, :2] += shift_xy
        keypoints_aug[:, :2] += shift_xy

        # 4. Point jitter (Gaussian noise, very small)
        jitter = np.random.normal(0, 0.005, size=points.shape)
        points += jitter

        # Save new augmented sample (keypoints only; mesh stays unchanged)
        new_sample = {
            "model_id": f"{model_id}_aug_{aug_id}",
            "keypoints": [
                {"semantic_id": kp["semantic_id"], "xyz": xyz.tolist()}
                for kp, xyz in zip(sample["keypoints"], keypoints_aug)
            ]
        }
        augmented_data.append(new_sample)



Augmenting dataset:   0%|          | 0/697 [00:00<?, ?it/s]

Augmenting dataset: 100%|██████████| 697/697 [02:51<00:00,  4.07it/s]


In [5]:
# Merge original + augmented samples
full_dataset = original_data + augmented_data

# Save to new JSON file
with open(OUTPUT_JSON, "w") as f:
    json.dump(full_dataset, f, indent=2)

print(f"Augmented dataset saved to {OUTPUT_JSON}. Total samples: {len(full_dataset)}")


Augmented dataset saved to guitar_augmented.json. Total samples: 4182


In [10]:
# Config
ORIGINAL_JSON = "guitar_9.json"
MESH_DIR = "Guitars/9_points"
OUTPUT_JSON = "guitar_9_augmented.json"
OUTPUT_MESH_DIR = "Guitars/9_Augmented/"
NUM_AUGMENTATIONS = 4  # How many times to augment each sample

In [11]:
# Create output directory for augmented meshes
os.makedirs(OUTPUT_MESH_DIR, exist_ok=True)

# Load original dataset
with open(ORIGINAL_JSON, 'r') as f:
    original_data = json.load(f)

In [12]:
augmented_data = []

for sample in tqdm(original_data, desc="Augmenting dataset with meshes"):
    model_id = sample["model_id"]
    mesh_path = os.path.join(MESH_DIR, model_id + ".ply")

    if not os.path.exists(mesh_path):
        print(f"Mesh {mesh_path} not found, skipping.")
        continue

    mesh = trimesh.load(mesh_path, force='mesh')
    if mesh.is_empty or len(mesh.faces) == 0:
        print(f"Mesh {model_id} is empty, skipping.")
        continue

    vertices = mesh.vertices
    faces = mesh.faces

    keypoints = np.array([kp["xyz"] for kp in sample["keypoints"]])

    for aug_id in range(NUM_AUGMENTATIONS):
        # 1. Random Z rotation
        theta = np.random.uniform(0, 2*np.pi)
        cos_theta, sin_theta = np.cos(theta), np.sin(theta)
        Rz = np.array([[cos_theta, -sin_theta, 0],
                       [sin_theta,  cos_theta, 0],
                       [0, 0, 1]])
        vertices_aug = vertices @ Rz.T
        keypoints_aug = keypoints @ Rz.T

        # 2. Uniform scaling
        scale = np.random.uniform(0.9, 1.1)
        vertices_aug *= scale
        keypoints_aug *= scale

        # 3. XY translation
        shift_xy = np.random.uniform(-0.05, 0.05, size=(1, 2))
        vertices_aug[:, :2] += shift_xy
        keypoints_aug[:, :2] += shift_xy

        # Save augmented mesh
        augmented_mesh = trimesh.Trimesh(vertices=vertices_aug, faces=faces, process=False)
        augmented_mesh_filename = f"{model_id}_aug_{aug_id}.ply"
        augmented_mesh.export(os.path.join(OUTPUT_MESH_DIR, augmented_mesh_filename))

        # Save augmented keypoints
        new_sample = {
            "model_id": f"{model_id}_aug_{aug_id}",
            "keypoints": [
                {"semantic_id": kp["semantic_id"], "xyz": xyz.tolist()}
                for kp, xyz in zip(sample["keypoints"], keypoints_aug)
            ]
        }
        augmented_data.append(new_sample)

# Merge original + augmented samples
full_dataset = original_data + augmented_data



Augmenting dataset with meshes: 100%|██████████| 587/587 [02:10<00:00,  4.51it/s]


In [13]:
# Save to new JSON file
with open(OUTPUT_JSON, "w") as f:
    json.dump(full_dataset, f, indent=2)

print(f"Augmented dataset with meshes saved to {OUTPUT_JSON}. Total samples: {len(full_dataset)}")


Augmented dataset with meshes saved to guitar_9_augmented.json. Total samples: 2935
