# 🔥🔥 Explore ModelNet Datasets using PyTorch Geometric and Weights & Biases 🪄🐝

<!--- @wandbcode{pyg-modelnet-eda} -->

## Install Required Libraries

In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

We now install PyTorch Geometric according to our PyTorch Version. We also install Weights & Biases.

In [None]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install -q wandb

### Import Libraries

In [None]:
from glob import glob
from PIL import Image
from tqdm.auto import tqdm

import wandb

import torch
import torch.nn.functional as F

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
from mpl_toolkits.mplot3d import Axes3D

import torch_geometric.transforms as T
from torch_geometric.datasets import ModelNet
from torch_geometric.loader import DataLoader
from torch_geometric.utils import to_networkx
from torch_geometric.nn import knn_graph, radius_graph

## Initialize Weights & Biases

We need to call [`wandb.init()`](https://docs.wandb.ai/ref/python/init) once at the beginning of our program to initialize a new job. This creates a new run in W&B and launches a background process to sync data.

In [None]:
wandb_project = "pyg-point-cloud" #@param {"type": "string"}
wandb_run_name = "modelnet10/train/sampling-comparison" #@param {"type": "string"}


wandb.init(project=wandb_project, name=wandb_run_name, job_type="eda")

# Set experiment configs to be synced with wandb
config = wandb.config
config.display_sample = 2048  #@param {type:"slider", min:256, max:4096, step:16}
config.modelnet_dataset_alias = "ModelNet10" #@param ["ModelNet10", "ModelNet40"] {type:"raw"}

# Classes for ModelNet10 and ModelNet40
categories = sorted([
    x.split(os.sep)[-2]
    for x in glob(os.path.join(
        config.modelnet_dataset_alias, "raw", '*', ''
    ))
])


config.categories = categories

## Load ModelNet Dataset using PyTorch Geometric

In [None]:
pre_transform = T.NormalizeScale()
transform = T.SamplePoints(config.display_sample)
train_dataset = ModelNet(
    root=config.modelnet_dataset_alias,
    name=config.modelnet_dataset_alias[-2:],
    train=True,
    transform=transform,
    pre_transform=pre_transform
)
val_dataset = ModelNet(
    root=config.modelnet_dataset_alias,
    name=config.modelnet_dataset_alias[-2:],
    train=False,
    transform=transform,
    pre_transform=pre_transform
)

## Log Data to [`wandb.Table`](https://docs.wandb.ai/ref/python/data-types/table)

We now log the dataset using a [Weights & Biases Table](https://docs.wandb.ai/guides/data-vis), which includes visualizing the individual point clouds as W&B's interactive 3D visualization format [`wandb.object3D`](https://docs.wandb.ai/ref/python/data-types/object3d). We also log the frequency distribution of the classes in the dataset using [`wandb.plot`](https://docs.wandb.ai/guides/track/log/plots).

In [None]:
table = wandb.Table(columns=["Model", "Class", "Split"])
category_dict = {key: 0 for key in config.categories}
for idx in tqdm(range(len(train_dataset[:20]))):
    point_cloud = wandb.Object3D(train_dataset[idx].pos.numpy())
    category = config.categories[int(train_dataset[idx].y.item())]
    category_dict[category] += 1
    table.add_data(
        point_cloud,
        category,
        "Train"
    )

data = [[key, category_dict[key]] for key in config.categories]
wandb.log({
    f"{config.modelnet_dataset_alias} Class-Frequency Distribution" : wandb.plot.bar(
        wandb.Table(data=data, columns = ["Class", "Frequency"]),
        "Class", "Frequency",
        title=f"{config.modelnet_dataset_alias} Class-Frequency Distribution"
    )
})

In [None]:
table = wandb.Table(columns=["Model", "Class", "Split"])
category_dict = {key: 0 for key in config.categories}
for idx in tqdm(range(len(val_dataset[:100]))):
    point_cloud = wandb.Object3D(val_dataset[idx].pos.numpy())
    category = config.categories[int(val_dataset[idx].y.item())]
    category_dict[category] += 1
    table.add_data(
        point_cloud,
        category,
        "Test"
    )
wandb.log({config.modelnet_dataset_alias: table})

data = [[key, category_dict[key]] for key in config.categories]
wandb.log({
    f"{config.modelnet_dataset_alias} Class-Frequency Distribution" : wandb.plot.bar(
        wandb.Table(data=data, columns = ["Class", "Frequency"]),
        "Class", "Frequency",
        title=f"{config.modelnet_dataset_alias} Class-Frequency Distribution"
    )
})

In [None]:
wandb.finish()

Next, you can check out the following notebook to learn how to compare different sampling strategies in PyTorch Geometric using Weights & Biases

![](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/pyg-sampling)