In [1]:
import numpy as np
import os

Convert D-FAUST dataset into .ply file

In [2]:
file_path = "/usr/stud/srinivaa/code/CaDeX/resource/data/Humans/D-FAUST/50002_hips/pcl_seq/00000003.npz"
data = np.load(file_path)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(data['points'])
o3d.io.write_point_cloud("data_3.ply", pcd)

True

Get the attributes of pcl_seq

In [3]:
for i in data.files:
    print(i)

points
loc
scale


Get the attributes of points_seq

In [4]:
points_seq = np.load("/usr/stud/srinivaa/code/CaDeX/resource/data/Humans/D-FAUST/50002_chicken_wings/points_seq/00000000.npz")
for i in points_seq.files:
    print(i)

points
occupancies
loc
scale


Check the size of total dataset

In [34]:
path, dirs, files = next(os.walk("/usr/stud/srinivaa/code/CaDeX/resource/data/Humans/D-FAUST"))
total_file_size = 0

for i in range(len(dirs)): 
    if(dirs[i] != '50004_chicken_wings' and dirs[i] != '50020_hips' and dirs[i] != '50025_shake_arms' and dirs[i] != '50020_running_on_spot' and dirs[i] != '50007_light_hopping_loose'):
        path_i,dirs_i,files_i = next(os.walk(os.path.join(path,dirs[i],'points_seq')))
        total_file_size += len(files_i)

In [35]:
total_file_size

39920

In [32]:
path, dirs, files = next(os.walk("/usr/stud/srinivaa/code/CaDeX/resource/data/Humans/D-FAUST"))
total_file_size = 0

for i in range(len(dirs)): 
    if(dirs[i] != '50002_light_hopping_loose' and dirs[i] != '50004_punching' and dirs[i] != '50007_shake_shoulders' and dirs[i] != '50009_chicken_wings' and dirs[i] != '50020_chicken_wings' and dirs[i] != '50022_light_hopping_loose' and dirs[i] != '50025_light_hopping_loose' and dirs[i] != '50026_shake_arms' and dirs[i] != '50027_shake_shoulders' and dirs[i] != '50004_chicken_wings' and dirs[i] != '50020_hips' and dirs[i] != '50025_shake_arms' and dirs[i] != '50020_running_on_spot' and dirs[i] != '50007_light_hopping_loose'):
        path_i,dirs_i,files_i = next(os.walk(os.path.join(path,dirs[i],'points_seq')))
        total_file_size += len(files_i)

In [33]:
total_file_size

37867

## Custom dataset of CaDeX

In [2]:
import dataset.oflow_dataset as oflow_dataset
from torch.utils import data
from torch.utils.data import DataLoader

Write a class for custom dataset 

In [3]:
class HumansDataset(data.Dataset):
    def __init__(
        self,
        dataset_folder,
        fields,
        split=None,
        categories=None,
        no_except=True,
        transform=None,
        length_sequence=17,
        n_files_per_sequence=-1,
        offset_sequence=0,
        ex_folder_name="pcl_seq",
        **kwargs
    ):
        # Attributes
        self.dataset_folder = dataset_folder
        self.fields = fields
        self.no_except = no_except
        self.transform = transform
        self.length_sequence = length_sequence
        self.n_files_per_sequence = n_files_per_sequence
        self.offset_sequence = offset_sequence
        self.ex_folder_name = ex_folder_name
       
        # Read metadata file
        metadata_file = os.path.join(dataset_folder, "metadata.yaml")

        
        self.metadata = {c: {"id": c, "name": "n/a"} for c in categories}

        # Set index
        for c_idx, c in enumerate(categories):
            self.metadata[c]["idx"] = c_idx #only one category: D-FAUST. contains single ID only

        # Get all models
        self.models = []
        for c_idx, c in enumerate(categories):
            subpath = os.path.join(dataset_folder, c) #subpath: /usr/stud/srinivaa/code/new_CaDeX/CaDeX/resource/data/Humans/D-FAUST
           
            if split is not None and os.path.exists(os.path.join(subpath, split + ".lst")):
                split_file = os.path.join(subpath, split + ".lst") # for train mode: /usr/stud/srinivaa/code/new_CaDeX/CaDeX/resource/data/Humans/D-FAUST/train.lst
                with open(split_file, "r") as f:
                    models_c = f.read().split("\n") # All files in train.lst for training mode
           
            models_c = list(filter(lambda x: len(x) > 0, models_c))
            models_len = self.get_models_seq_len(subpath, models_c) # gives the total number .npz files in each model
            models_c, start_idx = self.subdivide_into_sequences(models_c, models_len)
            self.models += [
                {"category": c, "model": m, "start_idx": start_idx[i]}
                for i, m in enumerate(models_c)
            ]
        
       

    def __len__(self):
       return len(self.models)
    
    
    def __getitem__(self,idx):
        category = self.models[idx]["category"]
        model = self.models[idx]["model"]
        start_idx = self.models[idx]["start_idx"]
        c_idx = self.metadata[category]["idx"]

        model_path = os.path.join(self.dataset_folder, category, model)

        data = {}

        
        for field_name, field in self.fields.items():
            field_data = field.load(model_path, idx, c_idx, start_idx)
                

            if isinstance(field_data, dict):
                for k, v in field_data.items():
                    if k is None:
                        data[field_name] = v
                    else:
                        data["%s.%s" % (field_name, k)] = v
            else:
                data[field_name] = field_data
           

        if self.transform is not None:
            data = self.transform(data)

        return data

    def get_models_seq_len(self, subpath, models):
        """Returns the sequence length of a specific model.

        This is a little "hacky" as we assume the existence of the folder
        self.ex_folder_name. However, in our case this is always given.

        Args:
            subpath (str): subpath of model category
            models (list): list of model names
        """
        ex_folder_name = self.ex_folder_name
        models_seq_len = []
        for m in models:
            _sublist = [
                f for f in os.listdir(os.path.join(subpath, m, ex_folder_name)) if "_" not in f
            ]
            models_seq_len.append(len(_sublist))
        # models_seq_len = [len(os.listdir(os.path.join(subpath, m, ex_folder_name))) for m in models]
        return models_seq_len

    def subdivide_into_sequences(self, models, models_len):
        """Subdivides model sequence into smaller sequences.

        Args:
            models (list): list of model names
            models_len (list): list of lengths of model sequences
        """
        length_sequence = self.length_sequence
        n_files_per_sequence = self.n_files_per_sequence
        offset_sequence = self.offset_sequence

        # Remove files before offset
        models_len = [l - offset_sequence for l in models_len]

        # Reduce to maximum number of files that should be considered
        if n_files_per_sequence > 0:
            models_len = [min(n_files_per_sequence, l) for l in models_len]

        models_out = []
        start_idx = []
        for idx, model in enumerate(models):
            for n in range(0, models_len[idx] - length_sequence + 1):
                models_out.append(model)
                start_idx.append(n + offset_sequence)

        return models_out, start_idx   
    

In [4]:
def get_transforms():
    """Returns transform objects.

    Args:
        cfg (yaml config): yaml config object
    """
    n_pcl = 100
    n_pt = 512
    n_pt_eval = 10000

    transf_pt = oflow_dataset.SubsamplePoints(n_pt)
    transf_pt_val = oflow_dataset.SubsamplePointsSeq(n_pt_eval, random=False)
    transf_pcl_val = oflow_dataset.SubsamplePointcloudSeq(n_pt_eval, random=False)
    transf_pcl = oflow_dataset.SubsamplePointcloudSeq(n_pcl, connected_samples=True)

    return transf_pt, transf_pt_val, transf_pcl, transf_pcl_val

In [18]:
def get_data_fields(mode):
    """Returns data fields.

    Args:
        mode (str): mode (train|val|test)
        cfg (yaml config): yaml config object
    """
    fields = {}
    seq_len_train = 17
   
    seq_len_val = seq_len_train
    p_folder = "points_seq" # points_seq: contains information regarding the points and their corresponding occupancy values
    pcl_folder = "pcl_seq" #pcl_seq : contains information regarding the points, scale and loc
    mesh_folder = "mesh_seq" #mesh_seq: non-existent. Utilize this to have a file containing points and faces for each model.
    generate_interpolate = False #False
    unpackbits = True # True
    
    training_all = False
    
    n_training_frames = 8

    # Transformation
    transf_pt, transf_pt_val, transf_pcl, transf_pcl_val = get_transforms()

    # Fields
    pts_iou_field = oflow_dataset.PointsSubseqField
    pts_corr_field = oflow_dataset.PointCloudSubseqField

    # MeshSubseqField can be used to load mesh fields

  
    not_choose_last = False
    training_multi_files = False
    
    loss_recon = "true"
    loss_corr = "true"

    if mode == "train":
        if loss_recon:
            if training_all:
                fields["points"] = pts_iou_field(
                    p_folder,
                    transform=transf_pt,
                    all_steps=True,
                    seq_len=seq_len_train,
                    unpackbits=unpackbits,
                    use_multi_files=training_multi_files,
                )
            else:
                fields["points"] = pts_iou_field(
                    p_folder,
                    sample_nframes=n_training_frames,
                    transform=transf_pt,
                    seq_len=seq_len_train,
                    fixed_time_step=0,
                    unpackbits=unpackbits,
                    use_multi_files=training_multi_files,
                )
            fields["points_t"] = pts_iou_field(
                p_folder,
                transform=transf_pt,
                seq_len=seq_len_train,
                unpackbits=unpackbits,
                not_choose_last=not_choose_last,
                use_multi_files=training_multi_files,
            )


            fields["mesh"] = oflow_dataset.MeshField(
            mesh_folder, seq_len=seq_len_val)

    # only training can be boost by multi-files
    # modify here, if not train, val should also load the same as the test
    else:
        fields["points"] = pts_iou_field(
            p_folder,
            transform=transf_pt_val,
            all_steps=True,
            seq_len=seq_len_val,
            unpackbits=unpackbits,
        )
        fields[
            "points_mesh"
        ] = pts_corr_field(  # ? this if for correspondence? Checked, this is for chamfer distance, make sure that because here we use tranforms, teh pts in config file must be 100000
            pcl_folder, transform=transf_pcl_val, seq_len=seq_len_val
        )
    # Connectivity Loss:
    if loss_corr:
        # fields["pointcloud"] = pts_corr_field(
        #     pcl_folder,
        #     transform=transf_pcl,
        #     seq_len=seq_len_train,
        #     use_multi_files=training_multi_files,
        # )
        fields["pointcloud"] = oflow_dataset.MeshField(
            mesh_folder, seq_len=seq_len_val)
    if mode == "test" and generate_interpolate:
        fields["mesh"] = oflow_dataset.MeshSubseqField(
            mesh_folder, seq_len=seq_len_val, only_end_points=True
        )
    fields["oflow_idx"] = oflow_dataset.IndexField()
    return fields

In [19]:
fields = get_data_fields("train")

In [20]:
dataset_folder = "/usr/data/cvpr_shared/marvin/Data/CaDeX/data/Humans"
categories = ["D-FAUST"]

Instantiate a custom dataset object

In [21]:
dataset = HumansDataset(
        dataset_folder,
        fields,
        split="train",
        categories=categories,
        length_sequence=17,
        n_files_per_sequence=-1,
        offset_sequence=15,
        ex_folder_name="mesh_seq_downsampled",
    )

Test the dataloader

In [22]:
dataloader_dict = {}
dataloader_dict["train"] = DataLoader(
                dataset, # keys : points, points_t, mesh, pointcloud, oflow_idx, inputs
                batch_size=12,
                shuffle=True,
                num_workers=2,
                pin_memory=False, #set to true to ensure faster data transfer between CPU and GPU. Set it to false only if the data 
                #directly transferred into GPU and it is really small
                drop_last=True,  # ! check this
            )

In [23]:
dataloader_dict["train"].dataset[1].keys()

  "vertices": np.array(mesh_vertices_seq),
  "triangles": np.array(mesh_face_seq),


dict_keys(['points', 'points.occ', 'points.time', 'points_t', 'points_t.occ', 'points_t.time', 'mesh.vertices', 'mesh.triangles', 'mesh.time', 'pointcloud.vertices', 'pointcloud.triangles', 'pointcloud.time', 'oflow_idx'])

In [24]:
for batch in iter(dataloader_dict["train"]):
    print("Size of a batch:", batch.size)

  "vertices": np.array(mesh_vertices_seq),
  "triangles": np.array(mesh_face_seq),
  "vertices": np.array(mesh_vertices_seq),
  "triangles": np.array(mesh_face_seq),


KeyboardInterrupt: 