# Notebook for step 4 of AML Project
This notebook is created for fine tuning the OpenShape model. We have used code from the following repositories:

Open shape:
@misc{liu2023openshape,
      title={OpenShape: Scaling Up 3D Shape Representation Towards Open-World Understanding},
      author={Minghua Liu and Ruoxi Shi and Kaiming Kuang and Yinhao Zhu and Xuanlin Li and Shizhong Han and Hong Cai and Fatih Porikli and Hao Su},
      year={2023},
      eprint={2305.10764},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

3DOS:
@inproceedings{
alliegro2022towards,
title={Towards Open Set 3D Learning: Benchmarking and Understanding Semantic Novelty Detection on Pointclouds},
author={Antonio Alliegro and Francesco Cappio Borlino and Tatiana Tommasi},
booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2022},
url={https://openreview.net/forum?id=X2dHozbd1at}
}

# Clone git repos to environment

In [None]:

!git clone https://github.com/Colin97/OpenShape_code.git
# Make sure you have git-lfs installed (https://git-lfs.com)
!git lfs install
!git clone https://huggingface.co/spaces/OpenShape/openshape-demo

# if you want to clone without large files – just their pointers
# prepend your git clone with the following env var:
!GIT_LFS_SKIP_SMUDGE=1
#!pip install -e .
!git clone https://huggingface.co/OpenShape/openshape-demo-support


fatal: destination path 'OpenShape_code' already exists and is not an empty directory.
Git LFS initialized.
fatal: destination path 'openshape-demo' already exists and is not an empty directory.
fatal: destination path 'openshape-demo-support' already exists and is not an empty directory.


# Install dependencies
Most dependencies are already satisfied by default in google colab

In [None]:
pip install huggingface_hub wandb omegaconf torch_redstone einops tqdm open3d dgl timm
pip install --upgrade https://github.com/unlimblue/KNN_CUDA/releases/download/0.2/KNN_CUDA-0.2-py3-none-any.whl


SyntaxError: invalid syntax (<ipython-input-54-36226d94238f>, line 1)

# Log in to Huggingface

In [None]:
from google.colab import output
output.enable_custom_widget_manager()

In [None]:
from huggingface_hub import login
login()

# Get the OpenShape model from Huggingface

In [None]:
%cd /content/openshape-demo-support
!pip install -e .
%cd /content

In [None]:
import openshape
model = openshape.load_pc_encoder('openshape-pointbert-vitg14-rgb')


model.eval()

# Function for fitting pointclouds to input dimentions for OpenShape

In [None]:
import numpy as np
import open3d as o3d
import random
import torch
from OpenShape_code.src.utils.data import normalize_pc

def load_ply(file_name, num_points=10000, y_up=True):
    pcd = o3d.io.read_point_cloud(file_name)  # Read the point cloud
    xyz = np.asarray(pcd.points)  # Get xyz coordinates
    rgb = np.asarray(pcd.colors)  # Get rgb colors
    n = xyz.shape[0]

    # Sample num_points points if necessary
    if n > num_points:
        idx = random.sample(range(n), num_points)
        xyz = xyz[idx]
        rgb = rgb[idx]
    elif n < num_points:
        print(f"Warning: requested {num_points} points, but file has only {n} points.", file=sys.stderr)

    # Adjust orientation by swapping y and z if requested
    if y_up:
        xyz[:, [1, 2]] = xyz[:, [2, 1]]

    # Normalize the point cloud coordinates
    xyz_normalized = normalize_pc(xyz)

    # Handle cases where rgb might be missing
    if rgb.size == 0:
        rgb = np.ones_like(xyz_normalized) * 0.4  # Default to a constant color if missing

    # Concatenate xyz with rgb
    features = np.concatenate([xyz_normalized, rgb], axis=1)

    # Convert to PyTorch tensors
    xyz_tensor = torch.from_numpy(xyz_normalized).float()
    features_tensor = torch.from_numpy(features).float()

    # Add batch dimention to fit as single input to OpenShape model
    features_tensor = features_tensor.unsqueeze(0)  # Adds a batch dimension, making it [1, N, 6]
    features_tensor = features_tensor.transpose(1, 2)  # Transposes to get [1, 6, N], matching the expected [B, C, N] format

    # Returning tensors instead of ME-specific batched coordinates
    return xyz_tensor, features_tensor

def filter_for_OpenShape(pointcloud, num_points=10000, y_up=True):
    xyz = np.asarray(pointcloud.points)  # Get xyz coordinates
    rgb = np.asarray(pointcloud.colors)  # Get rgb colors
    n = xyz.shape[0]

    # Sample num_points points if necessary
    if n > num_points:
        idx = random.sample(range(n), num_points)
        xyz = xyz[idx]
        rgb = rgb[idx]
    elif n < num_points:
        print(f"Warning: requested {num_points} points, but file has only {n} points.", file=sys.stderr)

    # Adjust orientation by swapping y and z if requested
    if y_up:
        xyz[:, [1, 2]] = xyz[:, [2, 1]]

    # Normalize the point cloud coordinates
    xyz_normalized = normalize_pc(xyz)

    # Handle cases where rgb might be missing
    if rgb.size == 0:
        rgb = np.ones_like(xyz_normalized) * 0.4  # Default to a constant color if missing

    # Concatenate xyz with rgb
    features = np.concatenate([xyz_normalized, rgb], axis=1)

    # Convert to PyTorch tensors
    xyz_tensor = torch.from_numpy(xyz_normalized).float()
    features_tensor = torch.from_numpy(features).float()

    # Add batch dimention to fit as single input to OpenShape model
    features_tensor = features_tensor.unsqueeze(0)  # Adds a batch dimension, making it [1, N, 6]
    features_tensor = features_tensor.transpose(1, 2)  # Transposes to get [1, 6, N], matching the expected [B, C, N] format

    # Returning tensors instead of ME-specific batched coordinates
    return xyz_tensor, features_tensor


# Test that installation and environment works

If a feature vector is printed it should all be A OK!

In [None]:
xyz, feat = load_ply("/content/OpenShape_code/demo/owl.ply")

device = torch.device("cpu")
model = model.to(device)
feat = feat.to(device)

output = model.forward(feat)

print(output)


# Clone 3DOS repo

In [None]:
# Clone 3D_OS repo from github
!git clone https://github.com/antoalli/3D_OS.git
!cd 3D_OS && chmod +x download_data.sh && ./download_data.sh
!pip install h5py protobuf lmdb msgpack-numpy ninja scikit-learn


# Using SR1 as ID create dataloaders for train, ID, OOD1 and OOD2.

The datasets we use are the same as the datasets used in 3DOS. We also use the code given in the 3DOS paper to create the dataloaders.


Citation:
\@inproceedings{
alliegro2022towards,
title={Towards Open Set 3D Learning: Benchmarking and Understanding Semantic Novelty Detection on Pointclouds},
author={Antonio Alliegro and Francesco Cappio Borlino and Tatiana Tommasi},
booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2022},
url={https://openreview.net/forum?id=X2dHozbd1at}
}

In [1]:
# Change folder to 3D_OS to get the same dataloaders
%cd /content/3D_OS

import sys
import os
import warnings
import numpy as np

sys.path.append(os.getcwd())
import os.path as osp
import time
from torch.cuda.amp import GradScaler, autocast
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader
from torchvision import transforms
from utils.utils import *
from utils.dist import *
# noinspection PyUnresolvedReferences
from utils.data_utils import H5_Dataset
#from datasets.modelnet import *
from datasets.scanobject import *
from models.classifiers import Classifier
from utils.ood_utils import get_confidence, eval_ood_sncore, iterate_data_odin, \
    iterate_data_energy, iterate_data_gradnorm, iterate_data_react, estimate_react_thres, print_ood_output, \
    get_penultimate_feats, get_network_output
import wandb
from base_args import add_base_args
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from models.common import convert_model_state, logits_entropy_loss
from models.ARPL_utils import Generator, Discriminator
from classifiers.common import train_epoch_cla, train_epoch_rsmix_exposure, train_epoch_cs
from classifiers.trainer_ddp_cla_md import get_args, load_yaml, get_md_eval_loaders

# Code from 3DOS repo classifiers/trainer_ddp_cla_md.py line 192-478

#  python -m torch.distributed.launch --nproc_per_node=1 Failure_Analysis/custom\
# tests/evaluating_PointNet_cosine_MLS.py --config cfgs/pn2-msg.yaml
# --exp_name PN2_cosine_SR1 --src SR1 --loss cosine -mode eval
# --ckpt_path outputs/PN2_cosine_SR1/models/model_best.pth


# Code from 3DOS repo classifiers/trainer_ddp_cla_md.py line 468-478
def eval_ood_md2sonn(opt, config):
    print(f"Arguments: {opt}")
    set_random_seed(opt.seed)

    dataloader_config = {
        'batch_size': opt.batch_size, 'drop_last': False, 'shuffle': False,
        'num_workers': opt.num_workers, 'sampler': None, 'worker_init_fn': init_np_seed}

    # whole evaluation is done on ScanObject RW data
    sonn_args = {
        'data_root': opt.data_root,
        'sonn_split': opt.sonn_split,
        'h5_file': opt.sonn_h5_name,
        'split': 'all',  # we use both training (unused) and test samples during evaluation
        'num_points': opt.num_points_test,  # default: use all 2048 sonn points to avoid sampling randomicity
        'transforms': None  # no augmentation applied at inference time
    }

    train_loader, _ = get_md_eval_loaders(opt)
    if opt.src == 'SR1':
        print("Src is SR1\n")
        id_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet1", **sonn_args), **dataloader_config)
        ood1_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet2", **sonn_args), **dataloader_config)
    elif opt.src == 'SR2':
        print("Src is SR2\n")
        id_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet2", **sonn_args), **dataloader_config)
        ood1_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet1", **sonn_args), **dataloader_config)
    else:
        raise ValueError(f"OOD evaluation - wrong src: {opt.src}")

    ood2_loader = DataLoader(ScanObject(class_choice="sonn_ood_common", **sonn_args), **dataloader_config)

    return train_loader, id_loader, ood1_loader, ood2_loader

class Options:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            setattr(self, key, value)

opt_dict = {
    "apply_fix_cellphone": True,
    "augm_set": 'rw',
    "batch_size": 1,
    "checkpoints_dir": 'outputs',
    "ckpt_path": 'outputs/PN2_cosine_SR1/models/model_best.pth',
    "config": 'cfgs/pn2-msg.yaml',
    "corruption": None,
    "cs": False,
    "cs_beta": 0.1,
    "cs_gan_lr": 0.0002,
    "data_root": "/content/3D_OS/3D_OS_release_data",
    "epochs": 250,
    "eval_step": 1,
    "exp_name": 'PN2_cosine_SR1',
    "grad_norm_clip": -1,
    "local_rank": 0,
    "loss": 'cosine',
    "num_points": 10000,
    "num_points_test": 10000,
    "num_workers": 6,
    "resume": None,
    "save_feats": None,
    "save_step": 10,
    "script_mode": 'eval',
    "seed": 1,
    "sonn_h5_name": 'objectdataset.h5',
    "sonn_split": 'main_split',
    "src": 'SR1',
    "tar1": 'none',
    "tar2": 'none',
    "use_amp": False,
    "use_sync_bn": False,
    "wandb_group": 'md-2-sonn-augmCorr',
    "wandb_name": None,
    "wandb_proj": 'benchmark-3d-ood-cla'
}

opt = Options(opt_dict)

config = {'optimizer': {'type': 'adam',
                        'skip_wd': [],
                        'weight_decay': 0.0001,
                        'kwargs': {'lr': 0.001}
                        },
          'scheduler': {'type': 'CosLR',
                        'kwargs': {'t_initial': 250,
                                   'cycle_limit': 1,
                                   'lr_min': 1e-05
                                   }
                        },
          'model': {'ENCO_NAME': 'pn2-msg',
                    'dropout': 0.5,
                    'cla_input_dim': 1024,
                    'act': 'relu'
                    }
          }

print(opt)
print(config)

train_loader, src_loader, tar1_loader, tar2_loader = eval_ood_md2sonn(opt, config)

%cd /content


[Errno 2] No such file or directory: '/content/3D_OS'
/content


ModuleNotFoundError: No module named 'utils'

# Test if dataloaders work

In [None]:
# Test if the data loaders work
import plotly.graph_objects as go

for i, (data, label) in enumerate(src_loader):
    points = data
    points = points.reshape(-1, 3)

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    fig = go.Figure(
      data=[
          go.Scatter3d(
            x=points[:,0],
            y=points[:,1],
            z=points[:,2],
            mode='markers',
          )
      ],
      layout=dict(
            scene=dict(
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            zaxis=dict(visible=False)
        )
      )
    )
    fig.show()
    break


# Run data through the OpenShape model

In [None]:
# Run data through OpenShape
from tqdm import tqdm
device = torch.device("cpu")
model = model.to(device)

%cd /content
save_dir = "OpenShape_outputs/SR1"
if not os.path.exists(save_dir):
    print(f"Creating directory: {save_dir}")
    os.makedirs(save_dir)

print("Running training data through the OpenShape model")
train_feats = []
for i, (data, label) in tqdm(enumerate(train_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    train_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/train_feats.npy"
np.save(outfile, np.array(train_feats))
print("Saved data to " + outfile)

print("Running ID data through the OpenShape model")
src_feats = np.array([])
for i, (data, label) in tqdm(enumerate(src_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    src_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/src_feats"
np.save(outfile, np.array(src_feats))
print("Saved data to " + outfile)

print("Rinning OOD1 data throug the OpenShape model")
tar1_feats = np.array([])
for i, (data, label) in tqdm(enumerate(tar1_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    tar1_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/tar1_feats"
np.save(outfile, np.array(tar1_feats))
print("Saved data to " + outfile)

print("Rinning OOD2 data throug the OpenShape model")
tar2_feats = np.array([])
for i, (data, label) in tqdm(enumerate(tar2_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    tar2_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/tar2_feats"
np.save(outfile, np.array(tar2_feats))
print("Saved data to " + outfile)

2378it [34:08,  1.16it/s]


In [None]:
old_train_feats = train_feats
old_src_feats = src_feats
old_tar1_feats = tar1_feats
old_tar2_feats = tar2_feats

In [None]:
# Load np arrays and convert to torch tensors

train_feats = np.load("OpenShape_outputs/train_feats.npy")
src_feats = np.load("OpenShape_outputs/src_feats.npy")
tar1_feats = np.load("OpenShape_outputs/tar1_feats.npy")
tar2_feats = np.load("OpenShape_outputs/tar2_feats.npy")

train_feats = torch.from_numpy(train_feats)
src_feats = torch.from_numpy(src_feats)
tar1_feats = torch.from_numpy(tar1_feats)
tar2_feats = torch.from_numpy(tar2_feats)

In [None]:
# Example NumPy array and target number of tensors
x_elements = old_tar2_feats  # Create an array of 24 elements
y_tensors = 847  # Want to divide into 4 tensors

# Ensure x_elements can be evenly divided into y_tensors
if len(x_elements) % y_tensors != 0:
    raise ValueError("The number of elements in the array must be divisible by y_tensors")

# Split the array into y parts
split_arrays = np.array_split(x_elements, y_tensors)

# Convert each part into a tensor and stack them into a single tensor
tensors_list = [torch.tensor(part) for part in split_arrays]
final_tar2_feats = torch.stack(tensors_list)

print(final_tar2_feats)

tensor([[-44.6219, -11.1272, -29.5261,  ..., -12.1809, -21.1658,  -6.4345],
        [-21.0046, -17.1524, -33.3568,  ..., -12.6945,  -5.2376,  -4.8490],
        [-37.6113,   4.8579, -13.5774,  ...,  -1.6024, -29.2849, -25.7459],
        ...,
        [-17.1690,  -4.1219, -31.9059,  ...,   2.7161,  -2.6888,  14.1641],
        [-32.7395,  -8.1993, -34.6582,  ..., -27.9810,  18.0565,   1.0996],
        [-20.5269, -13.4782, -68.9780,  ..., -35.9190, -10.1037,  35.2361]],
       dtype=torch.float64)


In [None]:
train_feats = final_train_feats
src_feats = final_src_feats
tar1_feats = final_tar1_feats
tar2_feats = final_tar2_feats

In [None]:

from knn_cuda import KNN
knn = KNN(k=1, transpose_mode=True)

device = torch.device("cuda")
train_feats = train_feats.to(device)
src_feats = src_feats.to(device)
tar1_feats = tar1_feats.to(device)
tar2_feats = tar2_feats.to(device)

################################################
print("Euclidean distances in a non-normalized space:")
# eucl distance in a non-normalized space
src_dist, src_ids = knn(train_feats.unsqueeze(0), src_feats.unsqueeze(0))
src_dist = src_dist.squeeze().cpu()
src_ids = src_ids.squeeze().cpu()  # index of nearest training sample
src_scores = 1 / src_dist
#src_pred = np.asarray([train_labels[i] for i in src_ids])  # pred is label of nearest training sample

# OOD tar1
tar1_dist, _ = knn(train_feats.unsqueeze(0), tar1_feats.unsqueeze(0))
tar1_dist = tar1_dist.squeeze().cpu()
tar1_scores = 1 / tar1_dist

# OOD tar2
tar2_dist, _ = knn(train_feats.unsqueeze(0), tar2_feats.unsqueeze(0))
tar2_dist = tar2_dist.squeeze().cpu()
tar2_scores = 1 / tar2_dist

eval_ood_sncore(
    scores_list=[src_scores, tar1_scores, tar2_scores],
    preds_list=[None, None, None],  # [src_pred, None, None],
    labels_list=[None, None, None],  # [src_labels, None, None],
    src_label=1  # confidence should be higher for ID samples
)


Euclidean distances in a non-normalized space:
AUROC - Src label: 1, Tar label: 0
SRC->TAR1:      AUROC: 0.7093, FPR95: 0.8883, AUPR_IN: 0.8188, AUPR_OUT: 0.5500
SRC->TAR2:      AUROC: 0.7706, FPR95: 0.8383, AUPR_IN: 0.8525, AUPR_OUT: 0.6355
SRC->TAR1+TAR2: AUROC: 0.7410, FPR95: 0.8624, AUPR_IN: 0.7369, AUPR_OUT: 0.7438
to spreadsheet: 0.7092644649827088,0.8883248730964467,0.8188178037655336,0.549951277846648,0.7706087103769103,0.8382526564344747,0.8524633410968205,0.6354814382874194,0.7410434104560352,0.8623853211009175,0.7368651347487273,0.7437531223314934


(-1,
 -1,
 {'fpr_at_95_tpr': 0.8883248730964467,
  'detection_error': 0.5638243436430416,
  'auroc': 0.7092644649827088,
  'aupr_in': 0.8188178037655336,
  'aupr_out': 0.549951277846648},
 {'fpr_at_95_tpr': 0.8382526564344747,
  'detection_error': 0.5197436287962169,
  'auroc': 0.7706087103769103,
  'aupr_in': 0.8524633410968205,
  'aupr_out': 0.6354814382874194},
 {'fpr_at_95_tpr': 0.8623853211009175,
  'detection_error': 0.4015435670100146,
  'auroc': 0.7410434104560352,
  'aupr_in': 0.7368651347487273,
  'aupr_out': 0.7437531223314934})

# Using SR2 as ID create dataloaders for train, ID, OOD1 and OOD2.

The datasets we use are the same as the datasets used in 3DOS. We also use the code given in the 3DOS paper to create the dataloaders.


Citation:
\@inproceedings{
alliegro2022towards,
title={Towards Open Set 3D Learning: Benchmarking and Understanding Semantic Novelty Detection on Pointclouds},
author={Antonio Alliegro and Francesco Cappio Borlino and Tatiana Tommasi},
booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2022},
url={https://openreview.net/forum?id=X2dHozbd1at}
}

In [None]:
# Clone 3D_OS repo from github
!git clone https://github.com/antoalli/3D_OS.git
!cd 3D_OS && chmod +x download_data.sh && ./download_data.sh
!pip install h5py protobuf lmdb msgpack-numpy ninja scikit-learn

In [None]:
# Change folder to 3D_OS to get the same dataloaders
%cd /content/3D_OS

import sys
import os
import warnings
import numpy as np

sys.path.append(os.getcwd())
import os.path as osp
import time
from torch.cuda.amp import GradScaler, autocast
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader
from torchvision import transforms
from utils.utils import *
from utils.dist import *
# noinspection PyUnresolvedReferences
from utils.data_utils import H5_Dataset
#from datasets.modelnet import *
from datasets.scanobject import *
from models.classifiers import Classifier
from utils.ood_utils import get_confidence, eval_ood_sncore, iterate_data_odin, \
    iterate_data_energy, iterate_data_gradnorm, iterate_data_react, estimate_react_thres, print_ood_output, \
    get_penultimate_feats, get_network_output
import wandb
from base_args import add_base_args
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from models.common import convert_model_state, logits_entropy_loss
from models.ARPL_utils import Generator, Discriminator
from classifiers.common import train_epoch_cla, train_epoch_rsmix_exposure, train_epoch_cs
from classifiers.trainer_ddp_cla_md import get_args, load_yaml, get_md_eval_loaders

# Code from 3DOS repo classifiers/trainer_ddp_cla_md.py line 192-478

#  python -m torch.distributed.launch --nproc_per_node=1 Failure_Analysis/custom\
# tests/evaluating_PointNet_cosine_MLS.py --config cfgs/pn2-msg.yaml
# --exp_name PN2_cosine_SR1 --src SR1 --loss cosine -mode eval
# --ckpt_path outputs/PN2_cosine_SR1/models/model_best.pth


# Code from 3DOS repo classifiers/trainer_ddp_cla_md.py line 468-478
def eval_ood_md2sonn(opt, config):
    print(f"Arguments: {opt}")
    set_random_seed(opt.seed)

    dataloader_config = {
        'batch_size': opt.batch_size, 'drop_last': False, 'shuffle': False,
        'num_workers': opt.num_workers, 'sampler': None, 'worker_init_fn': init_np_seed}

    # whole evaluation is done on ScanObject RW data
    sonn_args = {
        'data_root': opt.data_root,
        'sonn_split': opt.sonn_split,
        'h5_file': opt.sonn_h5_name,
        'split': 'all',  # we use both training (unused) and test samples during evaluation
        'num_points': opt.num_points_test,  # default: use all 2048 sonn points to avoid sampling randomicity
        'transforms': None  # no augmentation applied at inference time
    }

    train_loader, _ = get_md_eval_loaders(opt)
    if opt.src == 'SR1':
        print("Src is SR1\n")
        id_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet1", **sonn_args), **dataloader_config)
        ood1_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet2", **sonn_args), **dataloader_config)
    elif opt.src == 'SR2':
        print("Src is SR2\n")
        id_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet2", **sonn_args), **dataloader_config)
        ood1_loader = DataLoader(ScanObject(class_choice="sonn_2_mdSet1", **sonn_args), **dataloader_config)
    else:
        raise ValueError(f"OOD evaluation - wrong src: {opt.src}")

    ood2_loader = DataLoader(ScanObject(class_choice="sonn_ood_common", **sonn_args), **dataloader_config)

    return train_loader, id_loader, ood1_loader, ood2_loader

class Options:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            setattr(self, key, value)

opt_dict = {
    "apply_fix_cellphone": True,
    "augm_set": 'rw',
    "batch_size": 1,
    "checkpoints_dir": 'outputs',
    "ckpt_path": 'outputs/PN2_cosine_SR1/models/model_best.pth',
    "config": 'cfgs/pn2-msg.yaml',
    "corruption": None,
    "cs": False,
    "cs_beta": 0.1,
    "cs_gan_lr": 0.0002,
    "data_root": "/content/3D_OS/3D_OS_release_data",
    "epochs": 250,
    "eval_step": 1,
    "exp_name": 'PN2_cosine_SR1',
    "grad_norm_clip": -1,
    "local_rank": 0,
    "loss": 'cosine',
    "num_points": 10000,
    "num_points_test": 10000,
    "num_workers": 6,
    "resume": None,
    "save_feats": None,
    "save_step": 10,
    "script_mode": 'eval',
    "seed": 1,
    "sonn_h5_name": 'objectdataset.h5',
    "sonn_split": 'main_split',
    "src": 'SR2',
    "tar1": 'none',
    "tar2": 'none',
    "use_amp": False,
    "use_sync_bn": False,
    "wandb_group": 'md-2-sonn-augmCorr',
    "wandb_name": None,
    "wandb_proj": 'benchmark-3d-ood-cla'
}

opt = Options(opt_dict)

config = {'optimizer': {'type': 'adam',
                        'skip_wd': [],
                        'weight_decay': 0.0001,
                        'kwargs': {'lr': 0.001}
                        },
          'scheduler': {'type': 'CosLR',
                        'kwargs': {'t_initial': 250,
                                   'cycle_limit': 1,
                                   'lr_min': 1e-05
                                   }
                        },
          'model': {'ENCO_NAME': 'pn2-msg',
                    'dropout': 0.5,
                    'cla_input_dim': 1024,
                    'act': 'relu'
                    }
          }

print(opt)
print(config)

train_loader, src_loader, tar1_loader, tar2_loader = eval_ood_md2sonn(opt, config)

%cd /content


/content/3D_OS
<__main__.Options object at 0x7d88f38ddd50>
{'optimizer': {'type': 'adam', 'skip_wd': [], 'weight_decay': 0.0001, 'kwargs': {'lr': 0.001}}, 'scheduler': {'type': 'CosLR', 'kwargs': {'t_initial': 250, 'cycle_limit': 1, 'lr_min': 1e-05}}, 'model': {'ENCO_NAME': 'pn2-msg', 'dropout': 0.5, 'cla_input_dim': 1024, 'act': 'relu'}}
Arguments: <__main__.Options object at 0x7d88f38ddd50>
ModelNet40_OOD - Reading data from h5py file: /content/3D_OS/3D_OS_release_data/modelnet40_normal_resampled/ood_sets_cache/SR2_train.h5
ModelNet40_OOD - split: train, categories: {'bed': 0, 'toilet': 1, 'desk': 2, 'monitor': 3, 'table': 2}
SR2 train data len: 1916
ModelNet40_OOD - Reading data from h5py file: /content/3D_OS/3D_OS_release_data/modelnet40_normal_resampled/ood_sets_cache/SR2_test.h5
ModelNet40_OOD - split: test, categories: {'bed': 0, 'toilet': 1, 'desk': 2, 'monitor': 3, 'table': 2}
Src is SR2




This DataLoader will create 6 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.



ScanObject - num_points: 10000, sonn_split: main_split, h5_suffix: objectdataset.h5, split: all, class_choice: {10: 0, 14: 1, 5: 2, 6: 3, 9: 2}, num samples: 788
ScanObject - num_points: 10000, sonn_split: main_split, h5_suffix: objectdataset.h5, split: all, class_choice: {4: 0, 8: 1, 7: 2, 12: 3, 13: 4}, num samples: 1255
ScanObject - num_points: 10000, sonn_split: main_split, h5_suffix: objectdataset.h5, split: all, class_choice: {0: 404, 1: 404, 2: 404, 3: 404, 11: 404}, num samples: 847
/content


# Run data through the OpenShape model

In [None]:
# Run data through OpenShape
from tqdm import tqdm
device = torch.device("cpu")
model = model.to(device)

%cd /content
save_dir = "OpenShape_outputs/SR2"
if not os.path.exists(save_dir):
    print(f"Creating directory: {save_dir}")
    os.makedirs(save_dir)

print("Running training data through the OpenShape model")
train_feats = []
for i, (data, label) in tqdm(enumerate(train_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    train_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/train_feats.npy"
np.save(outfile, np.array(train_feats))
print("Saved data to " + outfile)

In [None]:


print("Running ID data through the OpenShape model")
src_feats = []
for i, (data, label) in tqdm(enumerate(src_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    src_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/src_feats"
np.save(outfile, np.array(src_feats))
print("Saved data to " + outfile)

print("Rinning OOD1 data throug the OpenShape model")
tar1_feats = []
for i, (data, label) in tqdm(enumerate(tar1_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    tar1_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/tar1_feats"
np.save(outfile, np.array(tar1_feats))
print("Saved data to " + outfile)

print("Rinning OOD2 data throug the OpenShape model")
tar2_feats = []
for i, (data, label) in tqdm(enumerate(tar2_loader)):
    points = data
    points = points.reshape(-1, 3)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    xyz, feat = filter_for_OpenShape(pcd)
    feat = feat.to(device)

    output_feat = model.forward(feat)

    tar2_feats.append(output_feat.detach().numpy())

outfile = save_dir + "/tar2_feats"
np.save(outfile, np.array(tar2_feats))
print("Saved data to " + outfile)

Running ID data through the OpenShape model



This DataLoader will create 6 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.

788it [11:18,  1.16it/s]

Saved data to OpenShape_outputs/SR2/src_feats
Rinning OOD1 data throug the OpenShape model



1255it [18:35,  1.12it/s]

Saved data to OpenShape_outputs/SR2/tar1_feats
Rinning OOD2 data throug the OpenShape model



847it [12:20,  1.14it/s]

Saved data to OpenShape_outputs/SR2/tar2_feats





In [None]:
# Load np arrays and convert to torch tensors

train_feats = np.load("OpenShape_outputs/SR2/train_feats.npy")
src_feats = np.load("OpenShape_outputs/SR2/src_feats.npy")
tar1_feats = np.load("OpenShape_outputs/SR2/tar1_feats.npy")
tar2_feats = np.load("OpenShape_outputs/SR2/tar2_feats.npy")

train_feats = np.squeeze(train_feats, axis=1)
src_feats = np.squeeze(src_feats, axis=1)
tar1_feats = np.squeeze(tar1_feats, axis=1)
tar2_feats = np.squeeze(tar2_feats, axis=1)

train_feats = torch.from_numpy(train_feats)
src_feats = torch.from_numpy(src_feats)
tar1_feats = torch.from_numpy(tar1_feats)
tar2_feats = torch.from_numpy(tar2_feats)

In [None]:

from knn_cuda import KNN
knn = KNN(k=1, transpose_mode=True)

device = torch.device("cuda")
train_feats = train_feats.to(device)
src_feats = src_feats.to(device)
tar1_feats = tar1_feats.to(device)
tar2_feats = tar2_feats.to(device)

################################################
print("Euclidean distances in a non-normalized space:")
# eucl distance in a non-normalized space
src_dist, src_ids = knn(train_feats.unsqueeze(0), src_feats.unsqueeze(0))
src_dist = src_dist.squeeze().cpu()
src_ids = src_ids.squeeze().cpu()  # index of nearest training sample
src_scores = 1 / src_dist
#src_pred = np.asarray([train_labels[i] for i in src_ids])  # pred is label of nearest training sample

# OOD tar1
tar1_dist, _ = knn(train_feats.unsqueeze(0), tar1_feats.unsqueeze(0))
tar1_dist = tar1_dist.squeeze().cpu()
tar1_scores = 1 / tar1_dist

# OOD tar2
tar2_dist, _ = knn(train_feats.unsqueeze(0), tar2_feats.unsqueeze(0))
tar2_dist = tar2_dist.squeeze().cpu()
tar2_scores = 1 / tar2_dist

eval_ood_sncore(
    scores_list=[src_scores, tar1_scores, tar2_scores],
    preds_list=[None, None, None],  # [src_pred, None, None],
    labels_list=[None, None, None],  # [src_labels, None, None],
    src_label=1  # confidence should be higher for ID samples
)


Euclidean distances in a non-normalized space:
AUROC - Src label: 1, Tar label: 0
SRC->TAR1:      AUROC: 0.6373, FPR95: 0.8032, AUPR_IN: 0.4992, AUPR_OUT: 0.7571
SRC->TAR2:      AUROC: 0.5183, FPR95: 0.9457, AUPR_IN: 0.5220, AUPR_OUT: 0.5336
SRC->TAR1+TAR2: AUROC: 0.5893, FPR95: 0.8606, AUPR_IN: 0.3466, AUPR_OUT: 0.8031
to spreadsheet: 0.6372873986288349,0.803187250996016,0.49922345612929653,0.7570801190469467,0.5182774078713166,0.9456906729634003,0.5219628685531112,0.5336026283680585,0.5893323738088453,0.8606089438629876,0.3465921559420079,0.8031033611301948


(-1,
 -1,
 {'fpr_at_95_tpr': 0.803187250996016,
  'detection_error': 0.33731978323428935,
  'auroc': 0.6372873986288349,
  'aupr_in': 0.49922345612929653,
  'aupr_out': 0.7570801190469467},
 {'fpr_at_95_tpr': 0.9456906729634003,
  'detection_error': 0.47496332993193746,
  'auroc': 0.5182774078713166,
  'aupr_in': 0.5219628685531112,
  'aupr_out': 0.5336026283680585},
 {'fpr_at_95_tpr': 0.8606089438629876,
  'detection_error': 0.2577222190311235,
  'auroc': 0.5893323738088453,
  'aupr_in': 0.3465921559420079,
  'aupr_out': 0.8031033611301948})

In [None]:
# Run this command to save data to google drive
#!cp -r /content/OpenShape_outputs "/content/drive/MyDrive/Skole/Advanced machine learning project"