In [1]:
# Following https://github.com/rusty1s/pytorch_scatter/issues/7#issuecomment-522295393 and
# and item 2 in Frequently Asked Questions in https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html
!pip uninstall torch-scatter torch-sparse
!pip uninstall torch-scatter torch-sparse
!pip uninstall torch-scatter torch-sparse

#!pip install torch-scatter==latest+cu101 torch-sparse==latest+cu101 -f https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/torch-1.6.0.html
#!pip install torch-geometric==1.6.1

!pip install --no-cache-dir torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html 
!pip install --no-cache-dir torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install --no-cache-dir torch-cluster -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install --no-cache-dir torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install --no-cache-dir torch-geometric

Uninstalling torch-scatter-2.0.7:
  Would remove:
    /usr/local/lib/python3.7/dist-packages/test/*
    /usr/local/lib/python3.7/dist-packages/torch_scatter-2.0.7.dist-info/*
    /usr/local/lib/python3.7/dist-packages/torch_scatter/*
  Would not remove (might be manually added):
    /usr/local/lib/python3.7/dist-packages/test/test_basis.py
    /usr/local/lib/python3.7/dist-packages/test/test_cat.py
    /usr/local/lib/python3.7/dist-packages/test/test_coalesce.py
    /usr/local/lib/python3.7/dist-packages/test/test_conv.py
    /usr/local/lib/python3.7/dist-packages/test/test_convert.py
    /usr/local/lib/python3.7/dist-packages/test/test_diag.py
    /usr/local/lib/python3.7/dist-packages/test/test_eye.py
    /usr/local/lib/python3.7/dist-packages/test/test_fps.py
    /usr/local/lib/python3.7/dist-packages/test/test_graclus.py
    /usr/local/lib/python3.7/dist-packages/test/test_grid.py
    /usr/local/lib/python3.7/dist-packages/test/test_knn.py
    /usr/local/lib/python3.7/dist-packages

# Point Clouds

Point clouds are simply a collection of points in 2d or 3d space. The are different from graphs in that they do not need to have edges. Nonetheless, edges can be constructed by connecting the set of K-nearest neighbors of each point in the point cloud. 

Here, we explore some of these datasets. A point cloud can have a label associated with it. For example, the label for a collection of points that represents a chair will be "chair." Graph Neural Networks can be trained to identify those catagories (i.e. graph-classification).

The dataset loaders included in PyG also give access to point cloud datasets.

## Exercise 1: visualizing ShapeNet dataset

The ShapeNet dataset consists of 14007 point clouds. Each point cloud has a category associated to it (e.g. Airplane, Guitar, Table, etc.). For more informatio about the dataset, refer to https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/datasets/shapenet.html




In [2]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from torch_geometric.datasets import ShapeNet
import itertools
import numpy as np
dataset_shapenet = ShapeNet(root='/tmp/ShapeNet') # Note this can take 5 minutes to complete


Let's first explore the properties of the ShapeNet dataset

In [3]:
print(dataset_shapenet)
print("Number of Graphs: ", dataset_shapenet.len())
tmp = 5
print(dataset_shapenet[tmp])
print(dataset_shapenet[tmp])
print(dataset_shapenet[tmp].pos[0,:])
print(dataset_shapenet[tmp].x[0,:])
print(dataset_shapenet[tmp].y[100:300])
print(len(set(dataset_shapenet[tmp].y[100:300].numpy())))
print(dataset_shapenet[tmp].category)

ShapeNet(14007, categories=['Airplane', 'Bag', 'Cap', 'Car', 'Chair', 'Earphone', 'Guitar', 'Knife', 'Lamp', 'Laptop', 'Motorbike', 'Mug', 'Pistol', 'Rocket', 'Skateboard', 'Table'])
Number of Graphs:  14007
Data(category=[1], pos=[2500, 3], x=[2500, 3], y=[2500])
Data(category=[1], pos=[2500, 3], x=[2500, 3], y=[2500])
tensor([ 0.1766, -0.0722,  0.1913])
tensor([ 0.0993,  0.0000, -0.9951])
tensor([14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
        13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
        13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 12, 12, 12, 12, 12,
        12, 12, 12, 14, 14, 14, 14, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13,
        13, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
        13, 13, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 14, 14, 14, 14, 14, 13,
        13, 13, 13, 13, 13, 14, 14, 14, 14, 14

You can see that the dataset consists of about 14000 graphs. Each graph is a point cloud of one out of 16 shape categories. Each point in the graph is labeled by the part of the object it belongs to. For example, for a bag, these labels can be either "handle" or "body."

To understand what pos, x, and y are in a PyG dataset, see 

Section "Data Handling of Graphs" of this link https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html

and Section 14 of http://pages.di.unipi.it/citraro/files/slides/Landolfi_tutorial.pdf

Lets visualize 16 randomly selected point clouds in the ShapeNet dataset using plotly.

In [4]:
def plot_samples_shapenet(dataset):
    # Define number of rows and columns
    rows, cols = 4, 4
    #rows, cols = 1, 1

    # select random indices to plot  (of size rows*cols)
    ids = np.random.randint(low=0, high=len(dataset), 
                            size=rows * cols).reshape(rows, cols)
    #ids = np.array([tmp]).reshape(rows, cols)
    # Get label corresponding to each index
    labels = [dataset.categories[dataset[int(ind)].category] 
              for ind in ids.flatten()]

    # Create subplots
    fig = make_subplots(rows=rows, cols=cols,
                        specs=[[{'type': 'scatter3d'}] * cols] * rows,
                        subplot_titles=labels,
                        vertical_spacing=0.04,
                        horizontal_spacing=0.04)

    #Fill subplots
    for row, col in itertools.product(range(rows), range(cols)):

        ind = int(ids[row, col])

        # Select graph from dataset 
        data = dataset[ind]

        # co-ordinates of points
        pos = data.pos

        # unpack coordinates
        xs, ys, zs = pos[:, 0], pos[:, 2], pos[:, 1]

        color = data.y

        # Plot into subplot 
        fig.add_trace(
            go.Scatter3d(x=xs, y=ys, z=zs, mode="markers",
                         marker=dict(size=2, color=color)),
            row=row + 1, col=col + 1
        )
    
    #fig.update_layout(**{"scene{}_aspectmode".format(i if i > 1 else ""): "data"
    #                   for i in range(1, rows * cols + 1)})

    fig.update_layout(height=1500, width=1500, title_text="ShapeNet Samples")
    fig.show()
    
plot_samples_shapenet(dataset_shapenet)

## Exercise 2: ModelNet dataset

The second data set that we explore here is the ModelNet dataset. It consists of 3D CAD models of 10/40 most common object categories in the world. When downloading the dataset, if you do not mention whether you want the 10 or 40 category versions, the former will be downloaded. 

For more information, see 

https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/datasets/modelnet.html

and

https://modelnet.cs.princeton.edu/


In [6]:
from torch_geometric.datasets import ModelNet
import itertools
import numpy as np
dataset_modelnet = ModelNet(root='/tmp/ModelNet') # Note this can take 5 minutes to complete

In [7]:
print(dataset_modelnet)
print(dataset_modelnet[0])

ModelNet10(3991)
Data(face=[3, 6273], pos=[4996, 3], y=[1])


In [8]:
def plot_samples_modelnet(dataset):
    # Define number of rows and columns
    rows, cols = 4, 4

    # select random indices to plot  (of size rows*cols)
    ids = np.random.randint(low=0, high=len(dataset), 
                            size=rows * cols).reshape(rows, cols)

    # Get label corresponding to each index
    #labels = [dataset.categories[dataset[int(ind)].category] 
    #          for ind in ids.flatten()]

    # Create subplots
    fig = make_subplots(rows=rows, cols=cols,
                        specs=[[{'type': 'scatter3d'}] * cols] * rows,
                        #subplot_titles=labels,
                        vertical_spacing=0.04,
                        horizontal_spacing=0.04)

    #Fill subplots
    for row, col in itertools.product(range(rows), range(cols)):

        ind = int(ids[row, col])

        # Select graph from dataset 
        data = dataset[ind]

        # co-ordinates of points
        pos = data.pos

        # unpack coordinates
        xs, ys, zs = pos[:, 0], pos[:, 2], pos[:, 1]

        color = data.y

        # Plot into subplot 
        fig.add_trace(
            go.Scatter3d(x=xs, y=ys, z=zs, mode="markers",
                         marker=dict(size=2, color=color)),
            row=row + 1, col=col + 1
        )
    
    fig.update_layout(**{"scene{}_aspectmode".format(i if i > 1 else ""): "data"
                         for i in range(1, rows * cols + 1)})

    fig.update_layout(height=1500, width=1500, title_text="ModelNet Samples")
    fig.show()
    
plot_samples_modelnet(dataset_modelnet)