<a href="https://colab.research.google.com/github/wandb/examples/blob/master/colabs/pyg/point-cloud-segmentation/00_eda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
<!--- @wandbcode{pyg-dgcnn-train} -->

# 🔥🔥 Explore ShapeNet Dataset using PyTorch Geometric and Weights & Biases 🪄🐝

<!--- @wandbcode{pyg-dgcnn-train} -->

[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/pyg/point-cloud-segmentation/colabs/pyg/point-cloud-segmentation/00_eda.ipynb)

This notebook demonstrates how to fetch and load the ShapeNet dataset for point cloud classification and segmentation tasks using [PyTorch Geometric](https://www.pyg.org/) and explore the dataset using [Weights & Biases](https://wandb.ai/site).

If you wish to know how to train and evaluate the model on the ShapeNetCore dataset using Weights & Biases, you can check out the following notebooks:

**Train DGCNN:** [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/pyg/point-cloud-segmentation/colabs/pyg/point-cloud-segmentation/01_dgcnn_train.ipynb)

**Evaluate DGCNN:** [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/pyg/point-cloud-segmentation/colabs/pyg/point-cloud-segmentation/02_dgcnn_evaluate.ipynb)

# Install Required Packages

In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

In [None]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install -q wandb

## Import Libraries

In [None]:
import os

import wandb
import numpy as np
from tqdm.auto import tqdm

import torch
import torch.nn.functional as F
from torch_scatter import scatter
from torchmetrics.functional import jaccard_index

import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
from torch_geometric.loader import DataLoader
from torch_geometric.nn import MLP, DynamicEdgeConv

In [None]:
wandb_project = "pyg-point-cloud" #@param {"type": "string"}
wandb_run_name = "evaluate-dgcnn" #@param {"type": "string"}

wandb.init(project=wandb_project, name=wandb_run_name, job_type="visualize")

config = wandb.config
config.category = 'Airplane' #@param ["Bag", "Cap", "Car", "Chair", "Earphone", "Guitar", "Knife", "Lamp", "Laptop", "Motorbike", "Mug", "Pistol", "Rocket", "Skateboard", "Table"] {type:"raw"}

path = os.path.join('ShapeNet', config.category)
pre_transform = T.NormalizeScale()
train_dataset = ShapeNet(path, config.category, split='trainval', pre_transform=pre_transform)
test_dataset = ShapeNet(path, config.category, split='test', pre_transform=pre_transform)

## Visualize Train-Val Dataset

In [None]:
segmentation_class_frequency = {}
for idx in tqdm(range(len(train_dataset))):
    pc_viz = train_dataset[idx].pos.numpy().tolist()
    segmentation_label = train_dataset[idx].y.numpy().tolist()
    for label in set(segmentation_label):
        segmentation_class_frequency[label] = segmentation_label.count(label)

class_offset = min(list(segmentation_class_frequency.keys()))

In [None]:
table = wandb.Table(columns=[
    "Point-Cloud", "Segmentation-Class-Frequency", "Model-Category", "Split"
])
for idx in tqdm(range(len(train_dataset))):
    pc_viz = train_dataset[idx].pos.numpy().tolist()
    segmentation_label = train_dataset[idx].y.numpy().tolist()
    
    frequency_dict = {key: 0 for key in segmentation_class_frequency.keys()}
    for label in set(segmentation_label):
        frequency_dict[label] = segmentation_label.count(label)
    
    for j in range(len(pc_viz)):
        pc_viz[j] += [segmentation_label[j] + 1 - class_offset]
    
    table.add_data(
        wandb.Object3D(np.array(pc_viz)), frequency_dict, config.category, "Train-Val"
    )

In [None]:
data = [[key, segmentation_class_frequency[key]] for key in segmentation_class_frequency.keys()]
wandb.log({
    f"ShapeNet Class-Frequency Distribution for {config.category} Train-Val Set" : wandb.plot.bar(
        wandb.Table(data=data, columns = ["Class", "Frequency"]),
        "Class", "Frequency",
        title=f"ShapeNet Class-Frequency Distribution for {config.category} Train-Val Set"
    )
})

## Visualize Test Dataset

In [None]:
segmentation_class_frequency = {}
for idx in tqdm(range(len(test_dataset))):
    pc_viz = train_dataset[idx].pos.numpy().tolist()
    segmentation_label = train_dataset[idx].y.numpy().tolist()
    for label in set(segmentation_label):
        segmentation_class_frequency[label] = segmentation_label.count(label)

In [None]:
for idx in tqdm(range(len(test_dataset))):
    pc_viz = train_dataset[idx].pos.numpy().tolist()
    segmentation_label = train_dataset[idx].y.numpy().tolist()
    
    frequency_dict = {key: 0 for key in segmentation_class_frequency.keys()}
    for label in set(segmentation_label):
        frequency_dict[label] = segmentation_label.count(label)
    
    for j in range(len(pc_viz)):
        pc_viz[j] += [segmentation_label[j] + 1 - class_offset]
    
    table.add_data(
        wandb.Object3D(np.array(pc_viz)), frequency_dict, config.category, "Test"
    )

wandb.log({"ShapeNet-Dataset": table})

In [None]:
data = [[key, segmentation_class_frequency[key]] for key in segmentation_class_frequency.keys()]
wandb.log({
    f"ShapeNet Class-Frequency Distribution for Test Set" : wandb.plot.bar(
        wandb.Table(data=data, columns = ["Class", "Frequency"]),
        "Class", "Frequency",
        title=f"ShapeNet Class-Frequency Distribution for Test Set"
    )
})

In [None]:
wandb.finish()