# ModelNet40

> A Dataset structure for ModelNet40 dataset.

In [None]:
#| default_exp datasets/modelnet

The code to load the ModelNet40 dataset comes from the DGCNN repo. 
There are some alternations, so that it can store and load the data from a custom path. 
Also there is an option to load only a specific class. 

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import os
import glob
import h5py
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from pclab.transforms import *

In [None]:
#| export

def download(path:str = None): # if path is None, it will download the data on the current dir, under the `data` subfolder.
    "A functions that downloads the ModelNet40 data, if not already downloaded, in the specified path"
    # adding the ability to use custom path
    if path == None:
        BASE_DIR = os.path.dirname(os.path.abspath(__file__))
        DATA_DIR = os.path.join(BASE_DIR, 'data')
        # Creating the path is it doesn't exist
        if not os.path.exists(DATA_DIR):
            os.mkdir(DATA_DIR)
    else:
        DATA_DIR = path
        
    if not os.path.exists(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048')):
        www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip'
        zipfile = os.path.basename(www)
        os.system('wget %s; unzip %s' % (www, zipfile))
        os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
        os.system('rm %s' % (zipfile))

In [None]:
show_doc(download)

---

[source](https://github.com/JohnRomanelis/pclab/blob/main/pclab/datasets/modelnet.py#L16){target="_blank" style="float:right; font-size:smaller"}

### download

>      download (path:str=None)

A functions that downloads the ModelNet40 data, if not already downloaded, in the specified path

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str | None | if path is None, it will download the data on the current dir, under the `data` subfolder. |

In [None]:
#| export

def load_data(partition:str, # `train` or `test` partition
              path:str=None):
    download(path)
    
    if path is None:
        BASE_DIR = os.path.dirname(os.path.abspath(__file__))
        DATA_DIR = os.path.join(BASE_DIR, 'data')
    else:
        DATA_DIR = path
        
    all_data = []
    all_label = []
    for h5_name in glob.glob(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048', 'ply_data_%s*.h5'%partition)):
        f = h5py.File(h5_name)
        data = f['data'][:].astype('float32')
        label = f['label'][:].astype('int64')
        f.close()
        all_data.append(data)
        all_label.append(label)
        
    all_data = np.concatenate(all_data, axis=0)
    all_label = np.concatenate(all_label, axis=0)
    
    return all_data, all_label

In [None]:
show_doc(load_data)

---

[source](https://github.com/JohnRomanelis/pclab/blob/main/pclab/datasets/modelnet.py#L36){target="_blank" style="float:right; font-size:smaller"}

### load_data

>      load_data (partition:str, path:str=None)

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| partition | str |  | `train` or `test` partition |
| path | str | None |  |

In [None]:
#| export 

m40_odered_labels = 'airplane bathtub bed bench bookshelf bottle bowl car chair cone \
cup curtain desk door dresser plower_pot glass_box guitar keyboard lamp \
laptop mantel monitor night_stand person piano plant radio range_hood sink \
sofa stairs stool table tent toilet tv_stand vase wardrobe xbox'.split(' ')

m40_cat2int = {m40_odered_labels[i] : i for i in range(40)} 


The labels in the downloaded file are represented as integers. Therefore, it is necessary to establish a mapping that associates these integers with the actual labels of the categories.

In [None]:
m40_odered_labels[0], m40_odered_labels[8], m40_odered_labels[24]

('airplane', 'chair', 'person')

In [None]:
m40_cat2int['airplane'], m40_cat2int['chair'], m40_cat2int['person']

(0, 8, 24)

In [None]:
#| export 

class ModelNet40(Dataset):
    "A ModelNet40 class is necessary for loading and accessing the data, and it inherits from the torch.utils.Dataset class."
    def __init__(self, 
                 path:str,               # path of the dataset
                 num_points:int,         # number of points 
                 partition:str='train',  # which partition to use (`train` or `test`)
                 transforms=[],          # the transforms to apply on each sample
                 category=-1):           # select a specific category of the dataset either by index or by name. By default returns samples from all 40 categories. 
        assert partition in ['train', 'test'], "Partition should be either 'train' or 'test'"
        self.path, self.num_points, self.partition=path, num_points, partition
        self.transforms = transforms if isinstance(transforms, (tuple, list)) else [transforms]
        self.data, self.label = load_data(partition, path)
        
        if type(category) == str:
            assert category in m40_odered_labels, "Please select a valid category label"
            category = m40_cat2int[category]
        else:
            assert category in [-1] + list(range(40)), "Category index should be either -1 or a number in [0, 39]"
        
        if category != -1:
            mask = np.zeros_like(self.label).astype('bool')
            mask[self.label == category] = 1
            self.data = self.data[mask.squeeze(), ...]
            self.label = self.label[mask]
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        pointcloud = self.data[item][:self.num_points]
        label = self.label[item]
        for t in self.transforms:
            pointcloud = t(pointcloud)
        return pointcloud, label               

In [None]:
show_doc(ModelNet40)

---

[source](https://github.com/JohnRomanelis/pclab/blob/main/pclab/datasets/modelnet.py#L70){target="_blank" style="float:right; font-size:smaller"}

### ModelNet40

>      ModelNet40 (path:str, num_points:int, partition:str='train',
>                  transforms=[], category=-1)

A ModelNet40 class is necessary for loading and accessing the data, and it inherits from the torch.utils.Dataset class.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str |  | path of the dataset |
| num_points | int |  | number of points |
| partition | str | train | which partition to use (`train` or `test`) |
| transforms | list | [] | the transforms to apply on each sample |
| category | int | -1 | select a specific category of the dataset either by index or by name. By default returns samples from all 40 categories. |

In [None]:
#| hide 
path = "/home/vvr/Desktop/vlassisgiannis/new_exps/data" #"/home/ioannis/Desktop/programming/data"

**Examples**

Load the full dataset:

In [None]:
#|eval: false
dataset = ModelNet40(path, 1024, 'train')
len(dataset)

9840

Load the airplane category using the category label:

In [None]:
#|eval: false
dataset = ModelNet40(path, 1024, 'train', category = 'airplane')
len(dataset)

625

Load the airplane category using the category index:

In [None]:
#|eval: false
dataset = ModelNet40(path, 1024, 'train', category = 0)
len(dataset)

625

Load a dataset with custom transforms:

In [None]:
#|eval: false
transforms = [RandomPointDropout(), RandomShuffle(), UnitSphereNormalization(), AnisotropicScale(), ToTensor()]
dataset = ModelNet40(path, 1024, 'train', transforms=transforms)
len(dataset)

9840

In [None]:
#|export 

def get_modelnet(path,
                 version='standard',  # which version to return 
                 return_dls=True,     # return dataloaders instead of dataset
                 batch_size=32):      # the batch_size to use if returning a dataloader
    "Get a version of ModelNet from a predefined set of versions, for faster coding"
    
    if version == 'standard':
    
        # TODO: Try on train to load the dataset with 2048 points and use RandomPointKeep transform with 1024 points -> Better Augmentation
        train_transforms = [RandomPointDropout(), RandomShuffle(), UnitSphereNormalization(), AnisotropicScale(), ToTensor()]
        valid_transforms = [UnitSphereNormalization(), ToTensor()]
    
        train_dataset = ModelNet40(path, 1024, 'train', transforms = train_transforms)
        valid_dataset = ModelNet40(path, 1024, 'test' , transforms = valid_transforms)
    
    
    
    if return_dls:
        train_loader = DataLoader(train_dataset, batch_size=batch_size,   shuffle=True, num_workers=8, drop_last=True)
        valid_loader = DataLoader(valid_dataset, batch_size=2*batch_size, shuffle=False,num_workers=8, drop_last=False) 
        
        return train_loader, valid_loader
    
    #else:
    return train_dataset, valid_dataset

So to get a predifined version of ModelNet:

In [None]:
train_loader, valid_loader = get_modelnet(path)