In [1]:
import os

os.chdir("/root/dev/hf/diffusers/examples/triplane_diffusion")
os.getcwd()

'/root/dev/hf/diffusers/examples/triplane_diffusion'

In [2]:
!gpustat

[1m[37m6f37a742720d              [m  Wed Apr  3 00:43:48 2024  [1m[30m525.89.02[m
[36m[0][m [34mNVIDIA GeForce RTX 4090[m |[31m 42°C[m, [32m 20 %[m | [36m[1m[33m 1028[m / [33m24564[m MB |
[36m[1][m [34mNVIDIA GeForce RTX 4090[m |[31m 37°C[m, [32m  0 %[m | [36m[1m[33m   10[m / [33m24564[m MB |


In [3]:
import torch

torch.cuda.set_device(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Explanation of the OpenShape Dataset

From OpenShape official [code](https://github.com/Colin97/OpenShape_code)

In [4]:
!ls "/root/hdd2/OpenShape"

Objaverse  download_data.py  meta_data	meta_data.zip  objaverse-processed


## Meta Data

```meta_data.zip``` includes the meta data used for __training (Objaverse, ShapeNet, ABO, and 3D-FUTURE)__ and __evaluation (on Objaverse-LVIS, ModelNet40, and ScanObjectNN)__:

- ```gpt4_filtering.json```: Filtering results of Objaverse raw texts, __generated with GPT4__.
- ```point_feat_knn.npy```: KNN indices calculated using shape features, used for hard mining during training.

- ```modelnet40/```
    - ```test_split.json```: List of ModelNet40 test shapes.
    - ```test_pc.npy```: Point clouds of ModelNet40 test shapes, 10000 x 3.
    - ```cat_name_pt_feat.npy```: Text features of ModelNet40 category names, __prompt engineering used__.

- ```lvis_cat_name_pt_feat.npy```: Text features of Objeverse-LVIS category names, __prompt engineering used__.

- ```scanobjectnn/```
    - ```xyz_label.npy```: Point clouds and labels of ScanObjectNN test shapes.
    - ```cat_name_pt_feat.npy```: Text features of ScanObjectNN category names, __prompt engineering used__.
- All text features are extracted using OpenCLIP (ViT-bigG-14, laion2b_s39b_b160k).

In [5]:
!ls "/root/hdd2/OpenShape/meta_data"

gpt4_filtering.json	   modelnet40	       scanobjectnn
lvis_cat_name_pt_feat.npy  point_feat_knn.npy  split


In [6]:
from dataset import dataset

In [9]:
modelnet40_config

'./dataset/openshape_train.yaml'

In [12]:
import yaml
from jjuke.net_utils import instantiate_from_config

# Change dataset.py corresponding to instantiate_from_config
modelnet40_config = """
    target: dataset.dataset.make_modelnet40test
    params:
        config:
            modelnet40:
                test_split: /root/hdd2/OpenShape/meta_data/modelnet40/test_split.json
                test_pc: /root/hdd2/OpenShape/meta_data/modelnet40/test_pc.npy
                num_points: 10000 
                num_workers: 0
                test_batch_size: 100
                clip_feat_path: /root/hdd2/OpenShape/meta_data/modelnet40/cat_name_pt_feat.npy
                y_up: True
            dataset:
                use_color: True
"""
modelnet40_config = yaml.safe_load(modelnet40_config)
modelnet40_dl = instantiate_from_config(modelnet40_config)

In [14]:
modelnet_data = next(iter(modelnet40_dl))

In [15]:
modelnet_data.keys()

dict_keys(['xyz', 'features', 'xyz_dense', 'features_dense', 'name', 'category'])

In [21]:
# Change dataset.py corresponding to instantiate_from_config
objaverselvis_config = """
    target: dataset.dataset.make_objaverse_lvis
    params:
        config:
            objaverse_lvis:
                split: /root/hdd2/OpenShape/meta_data/split/lvis.json
                clip_feat_path: /root/hdd2/OpenShape/meta_data/lvis_cat_name_pt_feat.npy
                num_points: 10000 
                num_workers: 6
                batch_size: 100
                y_up: True
                normalize: True
                use_color: True
"""
objaverselvis_config = yaml.safe_load(objaverselvis_config)
objaverselvis_dl = instantiate_from_config(objaverselvis_config)

In [22]:
# TODO: Edit json file to have only data name and use data_dir!
objaverselvis_data = next(iter(objaverselvis_dl))
objaverselvis_data.keys()

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/root/dev/hf/diffusers/examples/triplane_diffusion/dataset/dataset.py", line 502, in __getitem__
    data = np.load(self.split[index]['data_path'], allow_pickle=True).item()
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/numpy/lib/npyio.py", line 427, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/objaverse-processed/merged_for_training_final/Objaverse/000-090/d4c9180a46cf401fa24fa3afe9237a43.npy'


In [26]:
scanobjectnn_config = """
    target: dataset.dataset.make_scanobjectnntest
    params:
        config:
            scanobjectnn:
                data_path: /root/hdd2/OpenShape/meta_data/scanobjectnn/xyz_label.npy 
                num_points: 10000 
                num_workers: 0
                test_batch_size: 100
                clip_feat_path: /root/hdd2/OpenShape/meta_data/scanobjectnn/cat_name_pt_feat.npy
                y_up: True
            dataset:
                use_color: True
"""
scanobjectnn_config = yaml.safe_load(scanobjectnn_config)
scanobjectnn_dl = instantiate_from_config(scanobjectnn_config)

In [27]:
scanobjectnn_data = next(iter(scanobjectnn_dl))
scanobjectnn_data.keys()

dict_keys(['xyz', 'features', 'xyz_dense', 'features_dense', 'name', 'category'])

- ```split/```: List of training shapes.
    - ```train_all.json```: Training with __four datasets (Objaverse, ShapeNet, ABO, and 3D-FUTURE)__.
    - ```train_no_lvis.json```: Training with four datasets but __Objaverse-LVIS shapes excluded__.
    - ```ablation/train_shapenet_only.json```: Training with __ShapeNet shapes only__.

In [9]:
!ls "/root/hdd2/OpenShape/meta_data/split"

ablation  lvis.json  train_all.json  train_no_lvis.json


In [44]:
# Change dataset.py corresponding to instantiate_from_config
# train_config = """
#     target: dataset.dataset.Four
#     params:
#         config:
#             dataset:
#                 name: Four
#                 train_split: /root/hdd2/OpenShape/meta_data/split/train_all.json # [".../train_no_lvis.json", ".../ablation/train_shapenet_only.json"]
#                 train_partial: -1
#                 num_points: 10000 
#                 num_workers: 6
#                 train_batch_size: 200
#                 use_knn_negative_sample: False
#                 negative_sample_num: 1
#                 knn_path: /root/hdd2/OpenShape/meta_data/point_feat_knn.npy
#                 y_up: True
#                 normalize: True
#                 random_z_rotate: True
#                 use_color: True
#                 rgb_random_drop_prob: 0.5
#                 augment: True
#                 text_source: [text, caption, retrieval_text] 
#                 use_text_filtering: True
#                 use_prompt_engineering: True
#                 gpt4_filtering_path: /root/hdd2/OpenShape/meta_data/gpt4_filtering.json
#         phase: "train"
# """
train_config = """
    target: dataset.dataset.Four
    params:
        config:
            dataset:
                name: Four
                train_split: /root/hdd2/OpenShape/meta_data/split/ablation/train_shapenet_only.json
                train_partial: -1
                num_points: 10000 
                num_workers: 6
                train_batch_size: 200
                use_knn_negative_sample: False
                negative_sample_num: 1
                knn_path: /root/hdd2/OpenShape/meta_data/point_feat_knn.npy
                y_up: True
                normalize: True
                random_z_rotate: True
                use_color: True
                rgb_random_drop_prob: 0.5
                augment: True
                text_source: [text, caption, retrieval_text] 
                use_text_filtering: True
                use_prompt_engineering: True
                gpt4_filtering_path: /root/hdd2/OpenShape/meta_data/gpt4_filtering.json
        phase: "train"
"""
train_config = yaml.safe_load(train_config)
train_ds = instantiate_from_config(train_config)

In [45]:
from torch.utils.data import DataLoader
from dataset.dataset import minkowski_collate_fn

train_dl = DataLoader(
    train_ds,
    num_workers=train_config["params"]["config"]["dataset"]["num_workers"],
    collate_fn=minkowski_collate_fn,
    batch_size=train_config["params"]["config"]["dataset"]["train_batch_size"],
    shuffle=False
)

In [46]:
# TODO: Edit json file to have only data name and use data_dir!
train_data = next(iter(train_dl))
trian_data.keys()

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/root/dev/hf/diffusers/examples/triplane_diffusion/dataset/dataset.py", line 353, in __getitem__
    return self.get_others(self.split[index])
  File "/root/dev/hf/diffusers/examples/triplane_diffusion/dataset/dataset.py", line 276, in get_others
    data = np.load(meta['data_path'], allow_pickle=True).item()
  File "/opt/conda/envs/sgtd/lib/python3.9/site-packages/numpy/lib/npyio.py", line 427, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/objaverse-processed/merged_for_training_final/ShapeNet/04330267/2c4eb370f10b4667e6a1cd9763fc2f3f.npy'
