In [1]:
import numpy as np
import os
# import h5py
# import open3d as o3d

Convert D-FAUST dataset into .ply file

In [15]:
file_path = "/usr/stud/srinivaa/storage/slurm/cadex/dfaust_resgistered/Humans/50004_hips/mesh_seq_registered/00000000.npz"
data = np.load(file_path)
# pcd = o3d.geometry.PointCloud()
# pcd.points = o3d.utility.Vector3dVector(data['points'])
# o3d.io.write_point_cloud("registered_dfaust.ply", pcd)

o3d_mesh = o3d.geometry.TriangleMesh()
o3d_mesh.vertices = o3d.utility.Vector3dVector(data["points"]) # verify what is the name of the attribute for vertices in data dictionary. It should be vertices as given in load method in MeshField class.
o3d_mesh.triangles = o3d.utility.Vector3iVector(data["triangles"])
o3d.io.write_triangle_mesh("registered_dfaust.ply",o3d_mesh)

True

### Extract data from registred DFAUST scans

In [5]:
filename = "/usr/stud/srinivaa/storage/slurm/cadex/dfaust_resgistered/registrations_f.hdf5"
f =  h5py.File(filename, "r")
a_dataset_keys = list(f.keys()) #extract the different groups of meshes    


In [6]:
dataset_list = [a_dataset_keys[0],a_dataset_keys[-1]]

In [7]:

for data in dataset_list:
    data_face = f[data]
data_face_arr = np.asarray(data_face)

for data in a_dataset_keys[:1]:
    data_i = f[data]
data_i_arr = np.asarray(data_i)

In [12]:
file_index = "%08d"%9
file_index

'00000009'

In [65]:
o3d_mesh = o3d.geometry.TriangleMesh()
o3d_mesh.vertices = o3d.utility.Vector3dVector(data_i_arr[:,:,0]) # verify what is the name of the attribute for vertices in data dictionary. It should be vertices as given in load method in MeshField class.
o3d_mesh.triangles = o3d.utility.Vector3iVector(data_face_arr)
o3d.io.write_triangle_mesh("registered_dfaust.ply",o3d_mesh)

True

## Custom dataset of CaDeX

In [2]:
import dataset.oflow_dataset as oflow_dataset
from torch.utils import data
from torch.utils.data import DataLoader

Write a class for custom dataset 

In [3]:
class HumansDataset(data.Dataset):
    def __init__(
        self,
        dataset_folder,
        fields,
        split=None,
        categories=None,
        no_except=True,
        transform=None,
        length_sequence=17,
        n_files_per_sequence=-1,
        offset_sequence=0,
        ex_folder_name="pcl_seq",
        **kwargs
    ):
        # Attributes
        self.dataset_folder = dataset_folder
        self.fields = fields
        self.no_except = no_except
        self.transform = transform
        self.length_sequence = length_sequence
        self.n_files_per_sequence = n_files_per_sequence
        self.offset_sequence = offset_sequence
        self.ex_folder_name = ex_folder_name
       
        # Read metadata file
        metadata_file = os.path.join(dataset_folder, "metadata.yaml")

        
        self.metadata = {c: {"id": c, "name": "n/a"} for c in categories}

        # Set index
        for c_idx, c in enumerate(categories):
            self.metadata[c]["idx"] = c_idx #only one category: D-FAUST. contains single ID only

        # Get all models
        self.models = []
        for c_idx, c in enumerate(categories):
            subpath = os.path.join(dataset_folder, c) #subpath: /usr/stud/srinivaa/code/new_CaDeX/CaDeX/resource/data/Humans/D-FAUST
           
            if split is not None and os.path.exists(os.path.join(subpath, split + ".lst")):
                split_file = os.path.join(subpath, split + ".lst") # for train mode: /usr/stud/srinivaa/code/new_CaDeX/CaDeX/resource/data/Humans/D-FAUST/train.lst
                with open(split_file, "r") as f:
                    models_c = f.read().split("\n") # All files in train.lst for training mode
           
            models_c = list(filter(lambda x: len(x) > 0, models_c))
            models_len = self.get_models_seq_len(subpath, models_c) # gives the total number .npz files in each model
            models_c, start_idx = self.subdivide_into_sequences(models_c, models_len)
            self.models += [
                {"category": c, "model": m, "start_idx": start_idx[i]}
                for i, m in enumerate(models_c)
            ]
        
       

    def __len__(self):
       return len(self.models)
    
    
    def __getitem__(self,idx):
        category = self.models[idx]["category"]
        model = self.models[idx]["model"]
        start_idx = self.models[idx]["start_idx"]
        c_idx = self.metadata[category]["idx"]

        model_path = os.path.join(self.dataset_folder, category, model)

        data = {}

        
        for field_name, field in self.fields.items():
            field_data = field.load(model_path, idx, c_idx, start_idx)
 
            if isinstance(field_data, dict):
                for k, v in field_data.items():
                    if k is None:
                        data[field_name] = v
                    else:
                        data["%s.%s" % (field_name, k)] = v
            else:
                data[field_name] = field_data
           

        if self.transform is not None:
            data = self.transform(data)

        
        
        return data

    def get_models_seq_len(self, subpath, models):
        """Returns the sequence length of a specific model.

        This is a little "hacky" as we assume the existence of the folder
        self.ex_folder_name. However, in our case this is always given.

        Args:
            subpath (str): subpath of model category
            models (list): list of model names
        """
        ex_folder_name = self.ex_folder_name
        models_seq_len = []
        for m in models:
            _sublist = [
                f for f in os.listdir(os.path.join(subpath, m, ex_folder_name)) if "_" not in f
            ]
            models_seq_len.append(len(_sublist))
        # models_seq_len = [len(os.listdir(os.path.join(subpath, m, ex_folder_name))) for m in models]
        return models_seq_len

    def subdivide_into_sequences(self, models, models_len):
        """Subdivides model sequence into smaller sequences.

        Args:
            models (list): list of model names
            models_len (list): list of lengths of model sequences
        """
        length_sequence = self.length_sequence
        n_files_per_sequence = self.n_files_per_sequence
        offset_sequence = self.offset_sequence

        # Remove files before offset
        models_len = [l - offset_sequence for l in models_len]

        # Reduce to maximum number of files that should be considered
        if n_files_per_sequence > 0:
            models_len = [min(n_files_per_sequence, l) for l in models_len]

        models_out = []
        start_idx = []
        for idx, model in enumerate(models):
            for n in range(0, models_len[idx] - length_sequence + 1):
                models_out.append(model)
                start_idx.append(n + offset_sequence)

        return models_out, start_idx   
    

In [4]:
def get_transforms():
    """Returns transform objects.

    Args:
        cfg (yaml config): yaml config object
    """
    n_pcl = 100
    n_pt = 512
    n_pt_eval = 10000

    transf_pt = oflow_dataset.SubsamplePoints(n_pt)
    transf_pt_val = oflow_dataset.SubsamplePointsSeq(n_pt_eval, random=False)
    transf_pcl_val = oflow_dataset.SubsamplePointcloudSeq(n_pt_eval, random=False)
    transf_pcl = oflow_dataset.SubsamplePointcloudSeq(n_pcl, connected_samples=True)

    return transf_pt, transf_pt_val, transf_pcl, transf_pcl_val

In [5]:
def get_data_fields(mode):
    """Returns data fields.

    Args:
        mode (str): mode (train|val|test)
        cfg (yaml config): yaml config object
    """
    fields = {}
    seq_len_train = 17
   
    seq_len_val = seq_len_train
    p_folder = "points_seq" # points_seq: contains information regarding the points and their corresponding occupancy values
    pcl_folder = "pcl_seq" #pcl_seq : contains information regarding the points, scale and loc
    mesh_folder = "mesh_registred" #mesh_seq: non-existent. Utilize this to have a file containing points and faces for each model.
    generate_interpolate = False #False
    unpackbits = True # True
    
    training_all = False
    
    n_training_frames = 8

    # Transformation
    transf_pt, transf_pt_val, transf_pcl, transf_pcl_val = get_transforms()

    # Fields
    pts_iou_field = oflow_dataset.PointsSubseqField
    pts_corr_field = oflow_dataset.PointCloudSubseqField

    # MeshSubseqField can be used to load mesh fields

  
    not_choose_last = False
    training_multi_files = False
    
    loss_recon = "true"
    loss_corr = "true"

    if mode == "train":
        if loss_recon:
            if training_all:
                fields["points"] = pts_iou_field(
                    p_folder,
                    transform=transf_pt,
                    all_steps=True,
                    seq_len=seq_len_train,
                    unpackbits=unpackbits,
                    use_multi_files=training_multi_files,
                )
            else:
                fields["points"] = pts_iou_field(
                    p_folder,
                    sample_nframes=n_training_frames,
                    transform=transf_pt,
                    seq_len=seq_len_train,
                    fixed_time_step=0,
                    unpackbits=unpackbits,
                    use_multi_files=training_multi_files,
                )
            fields["points_t"] = pts_iou_field(
                p_folder,
                transform=transf_pt,
                seq_len=seq_len_train,
                unpackbits=unpackbits,
                not_choose_last=not_choose_last,
                use_multi_files=training_multi_files,
            )


            fields["mesh"] = oflow_dataset.MeshField(
            mesh_folder, seq_len=seq_len_val)

    # only training can be boost by multi-files
    # modify here, if not train, val should also load the same as the test
    else:
        fields["points"] = pts_iou_field(
            p_folder,
            transform=transf_pt_val,
            all_steps=True,
            seq_len=seq_len_val,
            unpackbits=unpackbits,
        )
        fields[
            "points_mesh"
        ] = pts_corr_field(  # ? this if for correspondence? Checked, this is for chamfer distance, make sure that because here we use tranforms, teh pts in config file must be 100000
            pcl_folder, transform=transf_pcl_val, seq_len=seq_len_val
        )
    # Connectivity Loss:
    if loss_corr:
        # fields["pointcloud"] = pts_corr_field(
        #     pcl_folder,
        #     transform=transf_pcl,
        #     seq_len=seq_len_train,
        #     use_multi_files=training_multi_files,
        # )
        fields["pointcloud"] = oflow_dataset.MeshField(
            mesh_folder, seq_len=seq_len_val)
    if mode == "test" and generate_interpolate:
        fields["mesh"] = oflow_dataset.MeshSubseqField(
            mesh_folder, seq_len=seq_len_val, only_end_points=True
        )
    fields["oflow_idx"] = oflow_dataset.IndexField()
    return fields

In [32]:
def get_inputs_field(mode):
    
    input_type = "mesh_seq"
    seq_len_train = 17
    seq_len_val = seq_len_train
    
    seq_len = seq_len_train
    

    if input_type is None:
        inputs_field = None
    elif input_type == "img_seq":
        if mode == "train" and cfg["dataset"]["oflow_config"]["img_augment"]:
            resize_op = transforms.RandomResizedCrop(
                cfg["dataset"]["oflow_config"]["img_size"], (0.75, 1.0), (1.0, 1.0)
            )
        else:
            resize_op = transforms.Resize((cfg["dataset"]["oflow_config"]["img_size"]))

        transform = transforms.Compose(
            [
                resize_op,
                transforms.ToTensor(),
            ]
        )

        if mode == "train":
            random_view = True
        else:
            random_view = False

        inputs_field = oflow_dataset.ImageSubseqField(
            cfg["dataset"]["oflow_config"]["img_seq_folder"], transform, random_view=random_view
        )
    elif input_type == "pcl_seq":
        connected_samples = cfg["dataset"]["oflow_config"]["input_pointcloud_corresponding"]
        transform = transforms.Compose(
            [
                oflow_dataset.SubsamplePointcloudSeq(
                    cfg["dataset"]["oflow_config"]["input_pointcloud_n"],
                    connected_samples=connected_samples,
                ),
                oflow_dataset.PointcloudNoise(
                    cfg["dataset"]["oflow_config"]["input_pointcloud_noise"]
                ),
            ]
        )
        training_multi_files = False
        if "training_multi_files" in cfg["dataset"]["oflow_config"]:
            if cfg["dataset"]["oflow_config"]["training_multi_files"] and mode == "train":
                training_multi_files = True
                logging.info(
                    "Oflow D-FAUST PCL Field use multi files to speed up disk performation"
                )

        inputs_field = oflow_dataset.PointCloudSubseqField(
            cfg["dataset"]["oflow_config"]["pointcloud_seq_folder"],
            transform,
            seq_len=seq_len,
            use_multi_files=training_multi_files,
        )
    #TODO : get inputs fields for mesh sequence
    elif input_type == "mesh_seq":
        
        # transform = transforms.Compose(
        #     [
        #         #oflow_dataset.MeshNoise(),
        #         #oflow_dataset.DownSampleMesh(N = 512)
        #     ]
        # )

        inputs_field = oflow_dataset.MeshField(
            "mesh_registred"
        )
    elif input_type == "end_pointclouds":
        transform = oflow_dataset.SubsamplePointcloudSeq(
            cfg["dataset"]["oflow_config"]["input_pointcloud_n"],
            connected_samples=cfg["dataset"]["oflow_config"]["input_pointcloud_corresponding"],
        )

        inputs_field = oflow_dataset.PointCloudSubseqField(
            cfg["dataset"]["oflow_config"]["pointcloud_seq_folder"],
            only_end_points=True,
            seq_len=seq_len,
            transform=transform,
        )
    elif input_type == "idx":
        inputs_field = oflow_dataset.IndexField()
    else:
        raise ValueError("Invalid input type (%s)" % input_type)
    return inputs_field

In [33]:
fields = get_data_fields("train")
inputs_field = get_inputs_field("train")

if inputs_field is not None:
    fields["inputs"] = inputs_field

In [34]:
dataset_folder = "/usr/data/cvpr_shared/marvin/Data/CaDeX/data/Humans"
categories = ["D-FAUST"]

Instantiate a custom dataset object

In [35]:
dataset = HumansDataset(
        dataset_folder,
        fields,
        split="train",
        categories=categories,
        length_sequence=17,
        n_files_per_sequence=-1,
        offset_sequence=15,
        ex_folder_name="mesh_registred",
    )

In [36]:
dataset[1]

{'points': array([[[ 0.06521299, -0.05698125,  0.31752512],
         [ 0.25040603, -0.04377124, -0.3058689 ],
         [-0.09378615,  0.4495377 , -0.10343115],
         ...,
         [ 0.16275991, -0.0174749 , -0.16190174],
         [-0.45050672,  0.1252174 ,  0.54895157],
         [-0.26513195, -0.3668223 , -0.02109268]],
 
        [[ 0.50589174,  0.02797915,  0.20571849],
         [-0.35474646,  0.27593046,  0.1856478 ],
         [-0.34140444, -0.00873516,  0.27456862],
         ...,
         [ 0.05789224,  0.53609043, -0.26586908],
         [ 0.17120288, -0.28693095, -0.31277567],
         [-0.13194604,  0.41042534,  0.52450025]],
 
        [[ 0.5546982 , -0.40892327, -0.04766466],
         [-0.1868754 ,  0.00089046,  0.20669077],
         [-0.37610364,  0.43072984,  0.47157764],
         ...,
         [ 0.11464693,  0.55037576, -0.35100394],
         [ 0.43035975,  0.3648591 ,  0.21714692],
         [ 0.24310677, -0.20663218,  0.45066878]],
 
        ...,
 
        [[ 0.07508831,  

In [31]:
import glob
start_idx = 0

folder = os.path.join("/usr/data/cvpr_shared/marvin/Data/CaDeX/data/Humans/D-FAUST/50021_hips","mesh_registred")
mesh_files = glob.glob(os.path.join(folder, "*.%s" % "npz"))
mesh_files.sort()
mesh_files = mesh_files[start_idx : start_idx + 17]

mesh_vertices_seq = []
mesh_face_seq = []

for f in mesh_files:
    data = np.load(f)
    vertices = data['points']
    triangles = data['triangles']

    mesh_vertices_seq.append(vertices)
    mesh_face_seq.append(triangles)

data = {"vertices":np.stack(mesh_vertices_seq)}

Test the dataloader

In [11]:
dataloader_dict = {}
dataloader_dict["train"] = DataLoader(
                dataset, # keys : points, points_t, mesh, pointcloud, oflow_idx, inputs
                batch_size=12,
                shuffle=True,
                num_workers=2,
                pin_memory=False, #set to true to ensure faster data transfer between CPU and GPU. Set it to false only if the data 
                #directly transferred into GPU and it is really small
                drop_last=True,  # ! check this
            )

### Study shape class

In [12]:
from core.models.utils_arap.shape_utils import Shape
import torch

In [13]:
shape_1 = Shape(vert=torch.from_numpy(dataset[1]['mesh.vertices']),triv=torch.from_numpy(dataset[1]['mesh.triangles']))

In [15]:
shape_1.get_neigh().type()

'torch.LongTensor'

### Study ARAP loss