# ML3D Project: Segment Anything Model (SAM) for 3D point clouds

Welcome to our ML3D project! Our mission is to make 3D segmentation through the implementation of the SAM Model [[1]](https://arxiv.org/abs/2304.02643), harnessing the power of 3D point clouds as our input. To achieve this, our approach involves the following process:

First, a 2D projection of the point cloud is performed using a spherical projection technique, followed by the application of SAM. Our method employs multiple centers for the sphere, providing a more comprehensive and detailed understanding of the entire space. Next, we translate these 2D masks into 3D points. Ultimately, we employ kNN to craft a comprehensive 3D segmentation of the space.

Important: We used ScanNet++ [[2]](https://kaldir.vc.in.tum.de/scannetpp/) as dataset. The scene scans were in the file format .ply. We changed the .ply files to .las files using the open-source tool Cloud Compare [[3]](https://www.cloudcompare.org/).

All imports:

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!cp -r /content/drive/MyDrive/ml3d_project ml3d_project/. # edit correct path
os.chdir('/content/drive/MyDrive/ml3d_project/')
#!pip uninstall laspy
!pip install laspy
!pip install trimesh
!pip install pillow


import numpy as np
import matplotlib.pyplot as plt
import laspy
from PIL import Image, ImageDraw

Mounted at /content/drive
cp: cannot create directory 'ml3d_project/.': No such file or directory
Installing requirements


In [None]:
!pip install pycocotools
!pip install -q jupyter_bbox_widget roboflow dataclasses-json supervision
from PIL import Image
import json
from pycocotools.coco import COCO
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
import tifffile
import random
from scipy import ndimage
import torch
import torch.nn as nn
import shutil
import sys
import cv2
import supervision as sv

# All necessary functions for the computation of the segmented point cloud

In [None]:
def generate_spherical_image_flip(center_coordinates, point_cloud, colors, resolution_y, camera_angle):

    R = np.array([[np.cos(np.radians(camera_angle)), -np.sin(np.radians(camera_angle)), 0],
                  [np.sin(np.radians(camera_angle)), np.cos(np.radians(camera_angle)), 0],
                  [0, 0, 1]])

    translated_points = point_cloud - center_coordinates

    transformed_points = np.dot(translated_points, R.T)

    theta = np.arctan2(transformed_points[:, 1], transformed_points[:, 0])
    phi = np.arccos(transformed_points[:, 2] / np.linalg.norm(transformed_points, axis=1))

    x = (theta + np.pi) / (2 * np.pi) * (2 * resolution_y)
    y = phi / np.pi * resolution_y

    resolution_x = 2 * resolution_y
    image = np.zeros((resolution_y, resolution_x, 3), dtype=np.uint8)

    mapping = np.full((resolution_y, resolution_x), -1, dtype=int)

    for i in range(len(translated_points)):
        ix = np.clip(int(x[i]), 0, resolution_x - 1)
        iy = np.clip(int(y[i]), 0, resolution_y - 1)
        if mapping[iy, ix] == -1 or np.linalg.norm(transformed_points[i]) < np.linalg.norm(transformed_points[mapping[iy, ix]]):
            mapping[iy, ix] = i
            image[iy, ix] = colors[i]
    return image, mapping

In [None]:
def color_point_cloud(image_path, point_cloud, mapping):
    image = cv2.imread(image_path)
    h, w = image.shape[:2]
    modified_point_cloud = np.zeros((point_cloud.shape[0], point_cloud.shape[1]+3), dtype=np.float32)
    modified_point_cloud[:, :3] = point_cloud
    for iy in range(h):
        for ix in range(w):
            point_index = mapping[iy, ix]
            if point_index != -1:
                color = image[iy, ix]
                modified_point_cloud[point_index, 3:] = color
    return modified_point_cloud

In [None]:
def export_point_cloud(cloud_path, modified_point_cloud):
    header = laspy.LasHeader(point_format=3, version="1.2")
    header.add_extra_dim(laspy.ExtraBytesParams(name="random", type=np.int32))

    las_o = laspy.LasData(header)
    las_o.x = modified_point_cloud[:,0]
    las_o.y = modified_point_cloud[:,1]
    las_o.z = modified_point_cloud[:,2]
    las_o.red = modified_point_cloud[:,3]
    las_o.green = modified_point_cloud[:,4]
    las_o.blue = modified_point_cloud[:,5]
    las_o.write(cloud_path)

    print("Export succesful at: ", cloud_path)
    return

# Spherical Projection of 3D PointClouds


In [None]:
resolution = 1024
camera_angle = [0]
spherical_images = []
mappings = []
random_point_clouds = []

In [None]:
directory_path = '/content/drive/MyDrive/ml3d_project/data/pointclouds'
directory_output = '/content/drive/MyDrive/ml3d_project/data/images'

for i in range(1, 11):
    file_path = directory_path + f'/pc_aligned_{i}.las'
    las = laspy.read(file_path)

    coords = np.vstack((las.x, las.y, las.z))
    point_cloud = coords.transpose()

    r = (las.red/65535*255).astype(int)
    g = (las.green/65535*255).astype(int)
    b = (las.blue/65535*255).astype(int)
    colors = np.vstack((r,g,b)).transpose()

    num_points = len(point_cloud)
    random_indices = np.random.choice(num_points, size=num_points // 2, replace=False)
    random_point_cloud = point_cloud[random_indices]
    random_point_clouds.append(random_point_cloud)
    random_colors = colors[random_indices]
    #del las

    center = np.mean(random_point_cloud, axis=0)
    # print(center)

    spherical_image, mapping = generate_spherical_image_flip(center, random_point_cloud, random_colors, resolution, angle)
    spherical_images.append(spherical_image)
    mappings.append(mapping)

    spherical_image = Image.fromarray(spherical_image)
    spherical_image.save(directory_output + f'/pc_aligned_spherical_{i}.jpg')

In [None]:
#Plotting an image with matplotlib
fig = plt.figure(figsize=(np.shape(spherical_images[0])[1]/72, np.shape(spherical_images[0])[0]/72))
fig.add_axes([0,0,1,1])
plt.imshow(spherical_images[0])
plt.axis('off')

# SAM Segmentation

In [None]:
gdrive_path = '/content/drive/MyDrive/ml3d_project'
os.chdir(gdrive_path)
HOME = os.getcwd()
print("HOME:", HOME)

In [None]:
# Install Segment Anything Model (SAM) and other dependencies
%cd {HOME}
!{sys.executable} -m pip install 'git+http://github.com/facebookresearch/segment-anything.git'

In [None]:
#Download weights from the database
path_weights = "/content/drive/MyDrive/ml3d_project/SAMWeights"
%cd $path_weights

!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

In [None]:
CHECKPOINT_PATH = os.path.join(path_weights, "sam_vit_h_4b8939.pth")
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))

In [None]:
#Load the model
import torch
torch.cuda.empty_cache()
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"

In [None]:
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)

In [None]:
#Automatic Mask Generator
mask_generator = SamAutomaticMaskGenerator(sam)

In [None]:
sam_results = []

for i in range(1, 11):
  IMAGE_PATH = f"/content/drive/MyDrive/ml3d_project/data/images/pc_aligned_{i}.jpg"
  print(IMAGE_PATH)
  image_bgr = cv2.imread(IMAGE_PATH)
  image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
  image_rescaled = rescale(image_rgb, 0.50, anti_aliasing=False)

  sam_result = mask_generator.generate(image_rgb)

  sam_results.append(sam_result)

  mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)

  detections = sv.Detections.from_sam(sam_result=sam_result)

  annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)

  sv.plot_images_grid(
    images=[image_bgr, annotated_image],
    grid_size=(1, 2),
    titles=['source image', 'segmented image']
  )

  cv2.imwrite(f'/content/drive/MyDrive/ml3d_project/data/segmented_images/segmented_image_{i}.jpg', annotated_image)

# 2D to 3D Mapping

*   After the spherical projection and segmentation, we map our images back to 3Dspace
*   Also, export the Colored Point Cloud into a las file

In [None]:
for i in range(1,11):

  image_path = f'/content/drive/MyDrive/ml3d_project/data/segmented_images/segmented_image_{i}.jpg'
  #point_cloud = f'/content/drive/MyDrive/ml3d_project/data/pointclouds/pc_aligned_{i}.las'
  j = i-1
  mapping = mappings[j]
  point_cloud = random_point_clouds[j]


  modified_point_cloud = color_point_cloud(image_path, point_cloud, mapping)
  export_point_cloud(f'/content/drive/MyDrive/ml3d_project/output/segmented_pointclouds/pc_aligned_{i}.las', modified_point_cloud)

Now you can use Cloud Compare to view the segmented pointcloud.



# k-NN

Color Assignment Logic: Assign the most frequent (mode) color among the nearest neighbors. This approach is useful to ensure that the assigned color is actually one of the colors present in the neighborhood, rather than a blend that might not exist.
This approach should classify unsegmented points based on the colors of their nearest segmented neighbors, helping to fill in unsegmented regions of the point cloud with appropriate colors.

In [None]:
from sklearn.neighbors import NearestNeighbors
from scipy.stats import mode

directory_path = '/content/drive/MyDrive/ml3d_project/output/segmented_pointclouds'
output_directory = '/content/drive/MyDrive/ml3d_project/output/kNN_output'
for i in range(1, 11):
    file_path = directory_path + f'/pc_aligned_{i}.las'

    las_file = laspy.read(file_path)
    points = np.vstack((las_file.x, las_file.y, las_file.z)).transpose()
    colors = np.vstack((las_file.red, las_file.green, las_file.blue)).transpose()
    unassigned_mask = np.all(colors == 0, axis=1)
    assigned_mask = ~unassigned_mask

    X_train = points[assigned_mask]
    X_test = points[unassigned_mask]
    colors_train = colors[assigned_mask]

    nn = NearestNeighbors(n_neighbors=10)
    nn.fit(X_train)

    distances, indices = nn.kneighbors(X_test)

    mode_colors, _ = mode(colors_train[indices], axis=1)
    colors_test = mode_colors.squeeze()
    colors[unassigned_mask] = colors_test.astype(int)

    new_las = laspy.create(point_format=las_file.header.point_format, file_version=las_file.header.version)
    new_las.x, new_las.y, new_las.z = points.transpose()
    new_las.red, new_las.green, new_las.blue = colors.transpose()

    new_las_path = output_directory + f'/kNN_segmented_pointcloud_final_{i}.las'
    new_las.write(new_las_path)
    print(f"Updated LAS file saved to: {new_las_path}")

Again, you can now take the output point cloud files and use Cloud Compare to view the segmented pointcloud.

# Bibliography

[1] Segment Anything
Kirillov et al.
https://arxiv.org/abs/2304.02643

[2] Dataset: ScanNet++
https://kaldir.vc.in.tum.de/scannetpp/

[3] Cloud Compare:
https://www.cloudcompare.org/

