In [1]:
import argparse
from transformers import AutoTokenizer
import torch
import os
from pointllm.conversation import conv_templates, SeparatorStyle
from pointllm.utils import disable_torch_init
from pointllm.model import *
from pointllm.model.utils import KeywordsStoppingCriteria

from pointllm.data import load_ulip2_objaverse_point_cloud

import os

In [2]:
def init_model(args):
    # Model
    disable_torch_init()

    model_path = args.model_path 
    print(f'[INFO] Model name: {model_path}')

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = PointLLMLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=False, use_cache=True, torch_dtype=torch.float16).cuda()
                                                     # args.torch_dtype).cuda()
    model.initialize_tokenizer_point_backbone_config_wo_embedding(tokenizer)

    model.eval()

    mm_use_point_start_end = getattr(model.config, "mm_use_point_start_end", False)
    # Add special tokens ind to model.point_config
    point_backbone_config = model.get_model().point_backbone_config
    
    if mm_use_point_start_end:
        if "v1" in model_path.lower():
            conv_mode = "vicuna_v1_1"
        else:
            raise NotImplementedError

        conv = conv_templates[conv_mode].copy()

    stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
    keywords = [stop_str]
    
    return model, tokenizer, point_backbone_config, keywords, mm_use_point_start_end, conv

In [3]:
import numpy as np
def pc_norm(pc):
    """ pc: NxC, return NxC """
    xyz = pc[:, :3]
    other_feature = pc[:, 3:]

    centroid = np.mean(xyz, axis=0)
    xyz = xyz - centroid
    m = np.max(np.sqrt(np.sum(xyz ** 2, axis=1)))
    xyz = xyz / m

    pc = np.concatenate((xyz, other_feature), axis=1)
    return pc
def load_ulip2_objaverse_point_cloud(data_path, object_id, pointnum=8192, use_color=False):

    if not use_color:
        filename = f"{object_id}/{object_id}_{pointnum}.npz"
        point_cloud = np.load(os.path.join(data_path, filename))['arr_0'] # * pointnum, 3 array
    else:
        filename = f"{object_id}_{pointnum}.npy"
        point_cloud = np.load(os.path.join(data_path, filename))

    # * normalize
    point_cloud = pc_norm(point_cloud)

    return point_cloud

def load_my_own_point_cloud(object_id):
    # /data2/llf/fss_data/ScanNet/scenes/data/scene0000_00.npy
    point_cloud = np.load(object_id)
    point_cloud = point_cloud[:,:6]
    point_cloud[:,3:] = point_cloud[:,3:]/255

    point_cloud = pc_norm(point_cloud)

    return torch.from_numpy(point_cloud).unsqueeze_(0).to(torch.float32)
    

def load_point_cloud(args):
    object_id = args.object_id
    print(f"[INFO] Loading point clouds using object_id: {object_id}")
    point_cloud = load_ulip2_objaverse_point_cloud(args.data_path, object_id, pointnum=8192, use_color=True)
    
    return object_id, torch.from_numpy(point_cloud).unsqueeze_(0).to(torch.float32)

In [4]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model-path", type=str, \
default="RunsenXu/PointLLM_7B_v1.1")

parser.add_argument("--data-path", type=str, default="/data2/llf/objaverse/8192_npy")
parser.add_argument("--torch-dtype", type=str, default="float32", choices=["float32", "float16", "bfloat16"])
args = parser.parse_args(args=[])
    # '--torch-dtype','float32'])

In [5]:
model, tokenizer, point_backbone_config, keywords, mm_use_point_start_end, conv = init_model(args)

[INFO] Model name: RunsenXu/PointLLM_7B_v1.1
Loading PointBERT config from /home/linfeng/PartLLM/pointllm/model/pointbert/PointTransformer_base_8192point.yaml.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
point_token_len = point_backbone_config['point_token_len']
default_point_patch_token = point_backbone_config['default_point_patch_token']
default_point_start_token = point_backbone_config['default_point_start_token']
default_point_end_token = point_backbone_config['default_point_end_token']

In [26]:
while True:
    print("-" * 80)
    # Prompt for object_id
    object_id = input("[INFO] Please enter the object_id or 'q' to quit: ")
    
    # Check if the user wants to quit
    if object_id.lower() == 'q':
        print("[INFO] Quitting...")
        break
    else:
        # print info
        print(f"[INFO] Chatting with object_id: {object_id}.")
    
    # Update args with new object_id
    args.object_id = object_id.strip()
    
    # Load the point cloud data
    try:
        id, point_clouds = load_point_cloud(args)
        print(id)
    except Exception as e:
        # print(f"[ERROR] {e}")
        # continue
        point_clouds = load_my_own_point_cloud(object_id)
        
    point_clouds = point_clouds.cuda().to(torch.float16)
    print(point_clouds.shape)

    # Reset the conversation template
    conv.reset()

    print("-" * 80)

    # Start a loop for multiple rounds of dialogue
    for i in range(100):
        # This if-else block ensures the initial question from the user is included in the conversation
        qs = input(conv.roles[0] + ': ')
        if qs == 'exit':
            break
        
        if i == 0:
            if mm_use_point_start_end:
                qs = default_point_start_token + default_point_patch_token * point_token_len + default_point_end_token + '\n' + qs
            else:
                qs = default_point_patch_token * point_token_len + '\n' + qs

        # Append the new message to the conversation history
        conv.append_message(conv.roles[0], qs)
        conv.append_message(conv.roles[1], None)
        prompt = conv.get_prompt()
        inputs = tokenizer([prompt])

        input_ids = torch.as_tensor(inputs.input_ids).cuda()

        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
        stop_str = keywords[0]

        with torch.inference_mode():
            output_ids = model.generate(
                input_ids,
                point_clouds=point_clouds,
                do_sample=True,
                temperature=1.0,
                top_k=50,
                max_length=2048,
                top_p=0.95,
                stopping_criteria=[stopping_criteria])

        input_token_len = input_ids.shape[1]
        n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
        if n_diff_input_output > 0:
            print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
        outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
        outputs = outputs.strip()
        if outputs.endswith(stop_str):
            outputs = outputs[:-len(stop_str)]
        outputs = outputs.strip()

        # Append the model's response to the conversation history
        conv.pop_last_none_message()
        conv.append_message(conv.roles[1], outputs)
        print(f'{conv.roles[1]}: {outputs}\n')

--------------------------------------------------------------------------------


[INFO] Please enter the object_id or 'q' to quit:  /data2/llf/fss_data/ScanNet/scenes/data/scene0011_00.npy


[INFO] Chatting with object_id: /data2/llf/fss_data/ScanNet/scenes/data/scene0011_00.npy.
[INFO] Loading point clouds using object_id: /data2/llf/fss_data/ScanNet/scenes/data/scene0011_00.npy
torch.Size([1, 206883, 6])
--------------------------------------------------------------------------------


USER:  what is this


ASSISTANT: The 3D model represents a meticulously designed house that is colored in different shades of grey. It consists of several rooms, possibly indicating several functionalities such as bedrooms, living rooms, kitchen etc. The grey color gives it a classic yet modern look. Houses such as these are generally associated with residential purposes and are usually seen in suburban areas.



KeyboardInterrupt: Interrupted by user

In [29]:
args

Namespace(model_path='RunsenXu/PointLLM_7B_v1.1', data_path='/data2/llf/objaverse/8192_npy', torch_dtype='float32', object_id='/data2/llf/fss_data/ScanNet/scenes/data/scene0011_00.npy')

In [34]:
len(os.listdir("/data2/llf/objaverse/8192_npy"))

661575

In [38]:
model.model.point_backbone(torch.rand(1,200000,6).cuda().to(torch.float16)).shape

torch.Size([1, 513, 1152])

In [39]:
model.model.point_backbone.num_group

512

In [10]:
point_token_len = point_backbone_config['point_token_len']
default_point_patch_token = point_backbone_config['default_point_patch_token']
default_point_start_token = point_backbone_config['default_point_start_token']
default_point_end_token = point_backbone_config['default_point_end_token']

In [11]:
default_point_patch_token

'<point_patch>'

In [12]:
default_point_start_token

'<point_start>'

In [2]:
tokenizer.vocab_size

NameError: name 'tokenizer' is not defined

In [None]:
tokenizer.add_tokens("[SEG]")

In [None]:
tokenizer.vocab_size

In [18]:
seg_token_idx = tokenizer("[SEG]", add_special_tokens=False).input_ids[0]

In [25]:
seg_token_idx

32003

In [8]:
from pointllm.model.pointbert import PointSegTransformer

ImportError: cannot import name 'PointSegTransformer' from 'pointllm.model.pointbert' (unknown location)

In [18]:
input= torch.Tensor(1,8192,6).to(torch.float16).cuda()

In [20]:
out = model.model.point_backbone(input)

In [22]:
out.shape

torch.Size([1, 513, 1152])