In [None]:
import torch
#get torch version
print(torch.__version__)
#check if CUDA is available
print(torch.cuda.is_available())
#get cuda version
print(torch.version.cuda)

#define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")




#### Utils Functions
Utility functions to help with the implementation of the solution.


In [28]:
import open3d as o3d
import numpy as np

#Takes a screenshot of the point cloud and saves it to the specified path
def render_point_cloud_to_image(pcd, image_path="output/tmp_screen.png"):
    vis = o3d.visualization.Visualizer()
    vis.create_window(visible=False) 
    vis.add_geometry(pcd)
    vis.poll_events()
    vis.update_renderer()
    vis.capture_screen_image(image_path)
    vis.destroy_window()


#Shows the point cloud in a window or saves it to the specified path
def show_point_cloud(point_cloud,render_to_image=False,save_path="output/tmp_screen.png"):
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(point_cloud)
    if(render_to_image):
        render_point_cloud_to_image(pcd,save_path)
    else:
        o3d.visualization.draw_geometries([pcd])
    
#Shows the point cloud in a window or saves it to the specified path, coloring the points based on the probabilities
def show_point_cloud_tresholded(point_cloud,probs,treshold,render_to_image=False,save_path="output/tmp_screen.png"):
    
    #if probs has 2 columns, delete the second one (refer to the "no object" class)
    if probs.shape[1]==2:
        probs = probs[:,0]

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(point_cloud)
    colors = np.zeros((len(point_cloud),3))
    for i in range(len(point_cloud)):
        if probs[i]>treshold:
            colors[i] = [1,0,0]
        else:
            colors[i] = [0.5,0.5,0.5]
    pcd.colors = o3d.utility.Vector3dVector(colors)
    if render_to_image:
        render_point_cloud_to_image(pcd,save_path)
    else:
        o3d.visualization.draw([pcd])

     
def create_point_cloud_from_mesh(mesh_path,name):
    mesh = o3d.io.read_triangle_mesh(mesh_path)
    pcd = mesh.sample_points_uniformly(number_of_points=10000) #Tune if needed.
    o3d.io.write_point_cloud(f"output/{name}.ply", pcd)
    return pcd

#### Load dataset
Extract the dataset from the input file, parse it and return the relevant information.

In [29]:
import pickle
def load_dataset(path):
    dataset = []
    with open(path, 'rb') as f:
        train_data = pickle.load(f)
        print("Loaded train_data")
        # print train_data
        for index,info in enumerate(train_data):
            
            temp_info = {}
            temp_info["shape_id"] = info["shape_id"]
            temp_info["semantic class"] = info["semantic class"]
            temp_info["affordance"] = info["affordance"]
            temp_info["data_info"] = info["full_shape"]
            dataset.append(temp_info)
    return dataset

## Functions for 3rd part


In [30]:
import open3d as o3d

import numpy as np


# Function to align the probabilities of the predicted points to the groundtruth points 
# (considering for each groundtruth point the closest point in the predicted points)
def align_prob(gt_points,approx_mesh_points,pred_class):
    selected_points = []

    # Itero sui punti di L1
    map_groundtruth_mesh = []

    approx_mesh_points_cpy = approx_mesh_points.cpu().detach().numpy()

    #Map the groundtruth points to the mesh points. It is downscaling the approx_mesh_points to match the size of the groundtruth points
    # map_groundtruth_mesh will contain the index of the groundtruth point and the index of the mesh point
    # selected_points will contain the mesh points thare chosen to be the closest to the groundtruth points
    for i,point in enumerate(gt_points):
        point_array = np.array(point)
        
        # Get distances from all the points in L2
        distances = np.linalg.norm(approx_mesh_points_cpy - point_array, axis=1)
        
        # Get the index of the closest point
        closest_index = np.argmin(distances)
    
        # Add the closest point to the selected points
        selected_points.append(tuple(approx_mesh_points_cpy[closest_index]))

        # add to map_groundtruth_mesh the tuple of  i and closest_index
        map_groundtruth_mesh.append((i,closest_index))
    
        #set to inf the closest point to avoid selecting it again
        approx_mesh_points_cpy[closest_index] = np.array([np.inf,np.inf,np.inf])


    #pred_classes contains the prediction of the model (to color that point) for each one of the selected points
    pred_classes_aligned = np.array([pred_class[j].detach().cpu() for _, j in map_groundtruth_mesh])

    return pred_classes_aligned

#average Intersection over Union: for each threshold from 0.01 to 1, calculate the IoU and average them
def aIoU(gt_prob,pred_classes_aligned):

    thresh = 0.01
    iou=0
    cont=0
 
    while thresh<1:
        intersection = len([i for i in range(len(gt_prob)) if gt_prob[i] > thresh and pred_classes_aligned[i][0] > thresh]) #both pred and gt are above threshold
        union= len([i for i in range(len(gt_prob))  if (gt_prob[i]>thresh  or pred_classes_aligned[i][0] > thresh  )]) #either pred or gt are above threshold
        if union==0:
            iou+=1
        else:
            iou+= intersection / union
        thresh+=0.01
        cont+=1

    aiou = iou/cont

    return aiou

#Mean Intersection over Union: for each threshold from 0.01 to 1, calculate the IoU and return the maximum
def mIoU(gt_prob,pred_classes_aligned,treshold_gt,treshold_pred):
    intersection = len([i for i in range(len(gt_prob)) if gt_prob[i] > treshold_gt and pred_classes_aligned[i][0] > treshold_pred]) #both pred and gt are above threshold
    union= len([i for i in range(len(gt_prob))  if (gt_prob[i]>treshold_gt  or pred_classes_aligned[i][0] > treshold_pred  )]) #either pred or gt are above threshold
    if union==0:
        return 1
    else:
        return intersection / union
    

## Functions for 2nd part


In [31]:
import torch
import kaolin
import trimesh
import trimesh.convex  
import open3d as o3d

def create_mesh(point_cloud,mesh_path,smooth=True):
    
    point_cloud=torch.tensor(point_cloud).cpu()
    min_coords, _ = point_cloud.min(dim=0)
    max_coords, _ = point_cloud.max(dim=0)
    original_scale = max_coords - min_coords
    original_translation = min_coords

    # Normalize the point cloud to [0, 1] range 
    normalized_point_cloud = (point_cloud - original_translation) / original_scale

    resolution = 20
    went_under=False

    # Searching for the best resolution that yields a greater number of vertices than the original point cloud while minimizing total vertices
    # Resolution = 20 chosen empirically as a good starting point

    while True:
        voxel_grid = kaolin.ops.conversions.pointclouds_to_voxelgrids(
            normalized_point_cloud.unsqueeze(0), resolution=resolution
        ).cuda()

        # Convert voxel grid to triangle mesh
        triangle_mesh = kaolin.ops.conversions.voxelgrids_to_trianglemeshes(
            voxel_grid, iso_value=0.95
        )
        if len(triangle_mesh[0][0]) <  len(point_cloud):
            went_under=True
            resolution+=1
            
            continue
        elif( went_under and len(triangle_mesh[0][0]) >=  len(point_cloud)):
            print("Choosen res",resolution)
            break
        
        resolution-=1


    # Extract vertices and faces from the triangle mesh
    verts, faces = triangle_mesh
    verts = verts[0].cpu()  
    faces = faces[0].cpu() 

   
    verts = verts / resolution  # Normalize vertices 

    #De-normalize vertices back to the original point cloud coordinates
    verts = verts * original_scale + original_translation

    # Create a Trimesh object


    #compue normals
    

    if verts.numel() == 0 or faces.numel() == 0:
        raise ValueError("Vertices or faces are empty. Cannot create a mesh.")

  

    mesh = trimesh.Trimesh(vertices=verts.cpu().numpy(), faces=faces.cpu().numpy())
    
    # Fix alignment issue
    verts[:, 1] -= 0.04
    verts[:, 0] -= 0.01
    verts[:,2]-=0.017

    # Smooth the mesh
    if smooth:
        mesh = trimesh.smoothing.filter_laplacian(mesh, lamb=0.2, iterations=8, 
                                    implicit_time_integration=False, 
                                    volume_constraint=True, 
                                    laplacian_operator=None)


    # Export to OBJ file
    mesh.export(mesh_path)
  
    

## Positional encoding extension
Load the image here, then it will be used later

In [32]:
#load an image from file
from PIL import Image

def load_image(image_path):
    image = Image.open(image_path)
    image = image.convert("RGB")
    return image

img_bg=load_image("bg/wood.jpg") 

img_bg=None  #NOTE: comment this line to use the background image


## Positional encoding extension
This function will then be used in the model definition

In [33]:
def positional_encoding(x, num_freq):

    frequencies = 2 ** torch.arange(num_freq, dtype=torch.float32) .to(device) # Frequencies 2^i
    frequencies = frequencies[None, :].to(device)  # Add a batch dimension
    x_expanded = x.unsqueeze(-1)  # Expand input dimension for broadcasting

    # Compute sine and cosine embeddings
    sin_enc = torch.sin(2 * np.pi * frequencies * x_expanded)
    cos_enc = torch.cos(2 * np.pi * frequencies * x_expanded)

    # Concatenate embeddings
    encoding = torch.cat([sin_enc, cos_enc], dim=-1)
    return encoding.view(*x.shape[:-1], -1)

#### Define the model

In [34]:
import clip
import copy
import json
import kaolin as kal
import kaolin.ops.mesh
import numpy as np
import os
import random
import torch
import torch.nn as nn
import torchvision

from itertools import permutations, product
from Normalization.MeshNormalizer import MeshNormalizer
from mesh import Mesh
from pathlib import Path
from render import Renderer
from tqdm import tqdm
from torch.autograd import grad
from torchvision import transforms
from utils import device, color_mesh


class NeuralHighlighter(nn.Module):
    def __init__(self, depth=5, width=256, out_dim=2,input_dim=3,use_pos_enc=False, pos_enc_dim=20):
        super(NeuralHighlighter, self).__init__()
        self.depth = depth
        self.width = width
        self.out_dim = out_dim
        self.pos_enc_dim = pos_enc_dim
        self.use_pos_enc = use_pos_enc
      
        if use_pos_enc: #Positional encoding extension
            self.encoded_dim = input_dim * 2 * pos_enc_dim  
        else:
            self.encoded_dim = input_dim
        # Core model
        self.model = nn.Sequential(
            nn.Linear(self.encoded_dim, width),
            nn.ReLU(),
            nn.LayerNorm(width),
        )

        # Replicate the core model depth times
        for _ in range(depth - 1):
            self.model.append(nn.Linear(width, width))
            self.model.append(nn.ReLU())
            self.model.append(nn.LayerNorm(width))

        # Final layers
        self.model.append(nn.Linear(width, out_dim))
        self.model.append(nn.Softmax(dim=1))

    def forward(self, x):
        if self.use_pos_enc:
            x_encoded = positional_encoding(x, self.pos_enc_dim) #Positional encoding extension
            x_encoded = x_encoded.view(x.shape[0], -1)
            return self.model(x_encoded)
        else:
            return self.model(x)

def get_clip_model(clipmodel):
    model, preprocess = clip.load(clipmodel)
    return model, preprocess

# ================== HELPER FUNCTIONS =============================
def save_final_results(log_dir, mesh, mlp, vertices, colors, render, background,ply_path=None):
    mlp.eval()
    with torch.no_grad():
        probs = mlp(vertices)
        max_idx = torch.argmax(probs, 1, keepdim=True)
        # for renders
        one_hot = torch.zeros(probs.shape).to(device)
        one_hot = one_hot.scatter_(1, max_idx, 1)
        sampled_mesh = mesh

        highlight = torch.tensor([255,255,0]).to(device)
        gray = torch.tensor([180, 180, 180]).to(device)
        colors = torch.stack((highlight/255, gray/255)).to(device)
        color_mesh(one_hot, sampled_mesh, colors)
        rendered_images, _, _ = render.render_views(sampled_mesh, num_views=5,
                                                                        show=False,
                                                                        center_azim=0,
                                                                        center_elev=0,
                                                                        std=1,
                                                                        return_views=True,
                                                                        lighting=True,
                                                                        background=background)
        # for mesh
        final_color = torch.zeros(vertices.shape[0], 3).to(device)
        final_color = torch.where(max_idx==0, highlight, gray)
        mesh.export(os.path.join(log_dir, f"{ply_path}.ply"), extension="ply", color=final_color)
        save_renders(log_dir, 0, rendered_images, name='final_render.jpg')
        return rendered_images


def clip_loss(embedding,images,clip_model,augmentations,augmentation_number):
    loss = 0.0
    encoded_text = clip_model.encode_text(embedding)
    for _ in range(augmentation_number):
        aug_img = augmentations(images)
        encoded_imgs = clip_model.encode_image(aug_img)
        loss -= torch.mean(torch.cosine_similarity(encoded_imgs, encoded_text))

    return loss/augmentation_number
    


def save_renders(dir, i, rendered_images, name=None):
    if name is not None:
        torchvision.utils.save_image(rendered_images, os.path.join(dir, name))
    else:
        torchvision.utils.save_image(rendered_images, os.path.join(dir, 'renders/iter_{}.jpg'.format(i)))


### Optimizer and settings

In [None]:

import random

from PIL import Image


seed = 1
# Constrain most sources of randomness
# (some torch backwards functions within CLIP are non-determinstic)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
import matplotlib.pyplot as plt

render_res = 224
learning_rate = 0.0008
n_iter = 1500
res = 224 
obj_path = 'output_mesh.obj'
n_augs = 3
output_dir = './output/'
# clip_version = 'ViT-L/14'
# clip_version = 'RN50x4'
# clip_version = 'RN50x16'
# clip_version = 'RN50x16'
clip_version = 'ViT-B/32'
render_color = "red" # or "yellow".
n_views = 5

Path(os.path.join(output_dir, 'renders')).mkdir(parents=True, exist_ok=True)
objbase, extension = os.path.splitext(os.path.basename(obj_path))
render = Renderer(dim=(render_res, render_res))

# Load dataset
dataset = load_dataset("data_bench/full_shape_train_data.pkl")


# Initialize variables
bg = torch.tensor((.5, .5, .5)).to(device)
log_dir = output_dir


# list of possible colors
rgb_to_color = {(204/255, 1., 0.): "highlighter", (180/255, 180/255, 180/255): "gray"}
color_to_rgb = {"highlighter": [204/255, 1., 0.], "gray": [180/255, 180/255, 180/255]}


if(render_color=="red"):
    # full_colors = [[204/255, 0., 0.], [180/255, 180/255, 180/255]]
    full_colors = [[204/255, 0., 0.], [180/255, 180/255, 180/255]]
elif(render_color=="yellow"):
    full_colors = [[204/255, 1., 0.], [180/255, 180/255, 180/255]]

colors = torch.tensor(full_colors).to(device)


# --- Prompt ---
clip_model,preprocess = get_clip_model(clip_version)
losses = []

#normalizer for image of clip
normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],std=[0.26862954, 0.26130258, 0.27577711]) #from https://github.com/openai/CLIP/issues/20


#Add augmentation
augmentations = transforms.Compose([
    transforms.RandomResizedCrop(render_res, scale=(0.5, 1.0)),
    transforms.RandomPerspective(p=0.5,distortion_scale=0.5,fill=1),
    normalize
])



def optimize(vertices, mlp, tokenized_text,optim,mesh,prompt,ply_path="3d-render"):
# Optimization loop
    for i in tqdm(range(n_iter)):
        optim.zero_grad()

        # predict highlight probabilities
        pred_class = mlp(vertices)

        # color and render mesh
        sampled_mesh = mesh
        color_mesh(pred_class, sampled_mesh, colors)
        rendered_images, elev, azim = render.render_views(sampled_mesh, num_views=n_views,
                                                                show=False,
                                                                center_azim=0,
                                                                center_elev=0,
                                                                std=1,
                                                                return_views=True,
                                                                lighting=True,
                                                                background=bg,
                                                                bg_image=img_bg)
                                                                # bg_img=bg_img)
        
        # Calculate CLIP Loss
        loss = clip_loss(tokenized_text,rendered_images,clip_model,augmentations,n_augs)
        loss.backward(retain_graph=True)

        optim.step()

        # update variables + record loss
        with torch.no_grad():
            losses.append(loss.item())

        # report results
        if i % 100 == 0:
            print("Last 100 CLIP score: {}".format(np.mean(losses[-100:])))
            save_renders(log_dir, i, rendered_images)
            with open(os.path.join(log_dir, "training_info.txt"), "a") as f:
                f.write(f"For iteration {i}... Prompt: {prompt}, Last 100 avg CLIP score: {np.mean(losses[-100:])}, CLIP score {losses[-1]}\n")
    # save results
    final_image=save_final_results(log_dir,mesh, mlp, vertices, colors, render, bg,ply_path=ply_path)
  
    del optim, mesh, rendered_images
    torch.cuda.empty_cache()
    losses.clear()
    return pred_class,final_image





def run_test_val(choosen_affordance,choosen_prompt,obj_class,test_val_dataset,gt_treshold=0.05, pred_treshold=0.3):
    iterations=0
    aiou=0
    miou=0

    for obj_num in range(test_val_dataset[obj_class][0],test_val_dataset[obj_class][1]):
            img=[None,None,None]
            iterations+=1
            mesh_path=f"output/{obj_class}_test_val_tmp.obj"
           
            create_mesh(dataset[obj_num]["data_info"]["coordinate"],mesh_path,smooth=True) #Set smooth to False if  preferred
          
            mesh = Mesh(obj_path=mesh_path)
            MeshNormalizer(mesh)()
            mlp = NeuralHighlighter().to(device)
            optim = torch.optim.Adam(mlp.parameters(), learning_rate)
            prompt = choosen_prompt.format(obj_name=obj_class,color=render_color)
            
            print("Prompt:",prompt)
            tokenized_text = clip.tokenize([prompt]).to(device) 
            vertices = copy.deepcopy(mesh.vertices)
           
            pred_class,final_render=optimize(vertices, mlp, tokenized_text,optim,mesh,obj_class)
            pred_class_aligned=align_prob(dataset[obj_num]["data_info"]["coordinate"],mesh.vertices,pred_class)
            #show in the notebook the final render
            img[0] = final_render[0].cpu().detach().numpy().transpose(1, 2, 0)
           
            show_point_cloud_tresholded(dataset[obj_num]["data_info"]["coordinate"], pred_class_aligned, pred_treshold,render_to_image=True,save_path="output/tmp_screen.png")
            img[1] = plt.imread("output/tmp_screen.png")


            show_point_cloud_tresholded(dataset[obj_num]["data_info"]["coordinate"], dataset[obj_num]["data_info"]["label"][choosen_affordance],gt_treshold,render_to_image=True,save_path="output/tmp_screen.png")
            img[2] = plt.imread("output/tmp_screen.png")

            fig, axes = plt.subplots(1, 3, figsize=(15, 5))  
            for i, ax in enumerate(axes):
                ax.imshow(img[i])
                ax.axis("off")
            axes[0].set_title("Final Mesh")
            axes[1].set_title("Predicted")
            axes[2].set_title("Ground Truth")

            plt.tight_layout()
            plt.show()
            
            aiou_tmp=aIoU(dataset[obj_num]["data_info"]["label"][choosen_affordance],pred_class_aligned)
            mIoU_tmp=mIoU(dataset[obj_num]["data_info"]["label"][choosen_affordance],pred_class_aligned,gt_treshold,pred_treshold)
            print(f"{obj_class} number {obj_num}: [aIoU: {aiou_tmp}, mIoU:{mIoU_tmp} ]")
            aiou+=aiou_tmp
            miou+=mIoU_tmp
    return aiou/iterations,miou/iterations


def run_test_single_obj(choosen_prompt,object_name,mesh=None,ply_path="3d-render"):
    if mesh is None:
        mesh_path=f"data/{object_name}.obj"
        mesh = Mesh(obj_path=mesh_path)
        MeshNormalizer(mesh)()
    mlp = NeuralHighlighter().to(device)
    optim = torch.optim.Adam(mlp.parameters(), learning_rate)
    prompt = choosen_prompt.format(obj_name=object_name,color=render_color)
    print("Prompt:",prompt)
    tokenized_text = clip.tokenize([prompt]).to(device)
    vertices = copy.deepcopy(mesh.vertices)
    pred_class=optimize(vertices, mlp, tokenized_text,optim,mesh,object_name,ply_path)
    
    



### Third part test-validation

In [38]:
#Define the dataset
allowed_affordance={"door":["openable","pull"],"vase":["pourable"],"sink":["openable"],"chair":["sittable"],"earphone":["grasp"],"scissors":["cut"],"bed":["layable"],"bottle":["openable"]}
invalid_list=[] 
#Define the test/val set for each object (the range of the indexes in the dataset)
obj_validation={"earphone":[639,660],"bed":[15283,15300],"chair":[3088,3090],"bottle":[7368,7383]}  
obj_test={"earphone":[660,700],"bed":[15300,15340],"bottle":[7410,7420]}



#Define test case (change this)
choosen_affordance="layable"
choosen_class="earphone"
choosen_prompt="A 3D render of a gray " +choosen_class+" with the graspable surface colored in {color}"

In [None]:
#Run validation
aiou,miou=run_test_val(choosen_affordance,choosen_prompt,choosen_class,obj_validation)
print(f"Validation final results: [aIoU: {aiou}, mIoU:{miou} ]")

In [None]:
#Run test
aiou,miou=run_test_val(choosen_affordance,choosen_prompt,choosen_class,obj_test)
print(f"Test final results: [aIoU: {aiou}, mIoU:{miou}]")

#### Single run

In [None]:
choosen_prompt="A 3D render of a gray {obj_name} with the poncho in red"
run_test_single_obj(choosen_prompt,object_name="horse",mesh=None,ply_path="3d-render")


## Extension multi class highligting

#### DBSCAN to cluster the points

In [14]:
import open3d as o3d
import numpy as np
from sklearn.cluster import DBSCAN



# Use RANSAC to find geometric planes (or other models, e.g., spheres)
def ransac_segmentation(pcd):

    # Fit planes using RANSAC
    plane_model, inliers = pcd.segment_plane(distance_threshold=0.02, ransac_n=3, num_iterations=10000)
    
    # Extract the points that belong to the plane
    inlier_points = pcd.select_by_index(inliers)
    outlier_points = pcd.select_by_index(inliers, invert=True)
    
    return inlier_points, outlier_points

# Use DBSCAN for unsupervised clustering of the remaining outlier points (e.g. dog and horse)
def dbscan_clustering(points):
    points = np.asarray(points.points)  # Convert Open3D point cloud to numpy array
    
    # Perform DBSCAN clustering
    db = DBSCAN(eps=0.2, min_samples=10)  # Adjust eps 
    labels = db.fit_predict(points)
    
    return labels





#### Multi-class highlighting loop

In [15]:
import torch
from torchvision import models, transforms
import open3d as o3d
import numpy as np
from PIL import Image
from IPython.display import display
import gc
import cv2


augmentations2 = transforms.Compose([
    transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],std=[0.26862954, 0.26130258, 0.27577711])
])

candiate_labels=["dog","horse","scissors","knife","table","chair","airplane"] #possible labels that every object can be classified as


def color_point_cloud_grey(pcd):
    num_points = np.asarray(pcd.points).shape[0]
    grey_color = np.full((num_points, 3), 0.5)  # Grey color (RGB)
    pcd.colors = o3d.utility.Vector3dVector(grey_color)
    return pcd
# Function to render and classify the object
def render_and_classify(pcd):
    model2,preprocess2 = get_clip_model("ViT-L/14")
    model2.eval()  # Set the model to evaluation mode
    render2 = Renderer(dim=(render_res, render_res))
    pcd= color_point_cloud_grey(pcd)
    #create mesh
    bg = torch.tensor((1., 1., 1.)).to("cuda")
    highlight2 = torch.tensor([100,100,100]).to(device)
    create_mesh(np.asarray(pcd.points),"output/temp.obj",smooth=False)

    #load mesh
    mesh=Mesh(obj_path="output/temp.obj")
    MeshNormalizer(mesh)()

    #create a tensor of shape len(mesh.vertices),2 where the first column is 1 and the second is 0
    pred_class= torch.ones(len(mesh.vertices),2).to(device)
    pred_class[:,1]=0

    color_mesh(pred_class,mesh,colors)
    
    rendered_image, elev, azim = render2.render_views(mesh, num_views=10,
                                                                show=False,
                                                                center_azim=0,
                                                                center_elev=0,
                                                                std=1,
                                                                return_views=True,
                                                                lighting=True,
                                                                background=bg)

    # Calculate CLIP loss for each label
    losses2 = []
    for label_embedding in candiate_labels:
        label_loss = clip_loss(
                embedding= clip.tokenize([f"A 3D render of a {render_color} {label_embedding}"]).to(device),
                images=rendered_image,
                clip_model=model2,
                augmentations=augmentations2,
                augmentation_number=1  # Number of augmentations
            )
        losses2.append(label_loss.item())

    # Find the label with the lowest loss
    
    predicted_label = candiate_labels[np.argmin(losses2)]
    del model2  
    torch.cuda.empty_cache()  
    gc.collect()  
    return mesh,predicted_label

#### Settings for the multi-class highlighting

In [None]:
point_clouds_path = "data/chair_table.ply"
mch_prompt="A 3D render of a gray {obj_name} with colored in {color} the sittable part"

#### Run multi-class highlighting

In [None]:


pdc = o3d.io.read_point_cloud(point_clouds_path)

# multi-order RANSAC
inlier_points, outlier_points = ransac_segmentation(pdc)

# Perform DBSCAN clustering on the outlier points
labels = dbscan_clustering(outlier_points)

# Visualize the clusters (separate objects)
unique_labels = set(labels)
clustered_points = []

for label in unique_labels:
    if label == -1:  # Noise points, skip them
        continue
    cluster = outlier_points.select_by_index(np.where(labels == label)[0])
    clustered_points.append(cluster)
    # o3d.visualization.draw_geometries([cluster])



images=[]

meshes = [None] * len(clustered_points)
labels = [None] * len(clustered_points)
for i,cluster in enumerate(clustered_points):
    meshes[i],labels[i] = render_and_classify(cluster)
    
for i,mesh in enumerate(meshes):
    print("Predicted label:", labels[i])
    run_test_single_obj(mch_prompt,labels[i],meshes[i],ply_path=f"3d-render_{i}")
    #READ THE IMAGE
    img = cv2.imread("output/final_render.jpg")
    images.append(img)
  



# Show the rendered images in the notebook
for img in images:
    display(Image.fromarray(img[:,:,::-1]))







#### Code to add a custom surface to the bottom of a 3d mesh

In [None]:
# mesh=o3d.io.read_triangle_mesh("data/Clip.obj")
# #save as point cloud
# pcd = mesh.sample_points_uniformly(number_of_points=20000)

# # Remove the points that have y=0 (the table)
# points = np.asarray(pcd.points)
# print(points)

# #generate a table under the object
# table = np.random.rand(100000, 3) * 0.2
# table[:, 1] = -0.0042
# table[:, 0] = table[:, 0] - 0.1
# table[:, 2] = table[:, 2] - 0.1
# points = np.append(points, table, axis=0)

# points = points[(points[:, 1] >= -0.0042) & ((points[:, 0] > -0.018) & (points[:, 0] < 0.0175)) & ((points[:, 2] > -0.025) & (points[:, 2] < 0.028))]

# pcd.points = o3d.utility.Vector3dVector(points)
# print(points)
# o3d.io.write_point_cloud("data/intermediate.ply", pcd)

# # convert pcd to mesh
# create_mesh(np.asarray(pcd.points),"data/Clip2.obj",smooth=True)