In [1]:
from dl1_data_handler.reader import DL1DataReaderSTAGE1, DL1DataReaderDL1DH
from ctlearn.utils import setup_DL1DataReader
from ctlearn.data_loader import KerasBatchGenerator
from preprocessing import ImagetoPointCloud

import h5py
import yaml

2024-07-11 11:18:33.549701: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-11 11:18:33.612563: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-11 11:18:33.612603: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-11 11:18:33.612637: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-11 11:18:33.622432: I tensorflow/core/platform/cpu_feature_g

In [2]:
# SetUp Reader and BatchGenerator to load Data

with open('sample_config.yml', 'r') as myconfig:
  config = yaml.load(myconfig, Loader=yaml.Loader)
config["Data"], data_format = setup_DL1DataReader(config, "train")
reader = DL1DataReaderSTAGE1(**config["Data"])
data_len = len(reader)
indices = list(range(data_len))

all_data = KerasBatchGenerator(
    reader,
    indices,
    batch_size=1,
    mode="train",
    class_names=None,
    stack_telescope_images=False,
)

In [3]:
print("Número total de imágenes ", data_len)

Número total de imágenes  8185


In [18]:
# Generate DataSet with 500 points and relative_coords

max_points = 500
relative_coords = True
img_to_pct = ImagetoPointCloud(max_points, relative_coords)
name = "dataset_500_rel_coords.h5"
with h5py.File(f'../data/{name}', 'w') as f:
  for i in range(data_len):
    image, labels = all_data.__getitem__(i)
    # print(image)
    image = image["images"][0,:,:,:]
    labels = labels[0]
    point_cloud = img_to_pct(image)
    grp = f.create_group(f'instance_{i}')
    grp.create_dataset('features', data=point_cloud["features"])
    grp.create_dataset('points', data=point_cloud["points"])
    grp.create_dataset('mask', data=point_cloud["mask"])
    grp.attrs['label'] = labels

print(f"Dataset guardado exitosamente en {name}")

Dataset guardado exitosamente en dataset_500_rel_coords.h5


# DataLoader

In [6]:
import numpy as np
import h5py
import keras
from keras.utils import Sequence

class PCDataGenerator(Sequence):
    def __init__(self, hdf5_file, indices, batch_size=32, shuffle=True):
        self.hdf5_file = hdf5_file
        self.indices = indices
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        # Generate indexes of the batch
        batch_indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(batch_indices)

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_indices):
        features_list = []
        points_list = []
        mask_list = []
        labels_list = []

        with h5py.File(self.hdf5_file, 'r') as f:
            for idx in batch_indices:
                grp = f[idx]
                features_list.append(grp['features'][:])
                points_list.append(grp['points'][:])
                mask_list.append(grp['mask'][:])
                labels_list.append(grp.attrs['label'])

        # Convert lists to numpy arrays
        features = np.array(features_list)
        points = np.array(points_list)
        mask = np.array(mask_list)
        labels = np.array(labels_list)

        return [points, features, mask], labels


## Test on DataLoader

In [7]:
data_path = "../data/dataset_500_rel_coords.h5"
# Dividir el dataset en train, test y validation
with h5py.File(data_path, 'r') as f:
    all_indices = list(f.keys())

# Suponiendo que quieres una división 70% train, 15% validation, 15% test
np.random.shuffle(all_indices)
train_split = int(0.7 * len(all_indices))
val_split = int(0.85 * len(all_indices))

train_indices = all_indices[:train_split]
val_indices = all_indices[train_split:val_split]
test_indices = all_indices[val_split:]

# Crear generadores para cada partición
batch_size = 32
train_generator = PCDataGenerator(data_path, train_indices, batch_size=batch_size, shuffle=True)
val_generator = PCDataGenerator(data_path, val_indices, batch_size=batch_size, shuffle=True)
test_generator = PCDataGenerator(data_path, test_indices, batch_size=batch_size, shuffle=False)


In [11]:
print(train_generator.__getitem__(0))
print(val_generator.__getitem__(0))
print(test_generator.__getitem__(0))

([array([[[-34.,  14.],
        [  5., -44.],
        [ 22.,  44.],
        ...,
        [-32.,   2.],
        [-37.,   4.],
        [ 14.,  35.]],

       [[  7.,  29.],
        [ 40.,   7.],
        [ 12.,  32.],
        ...,
        [ 18.,  22.],
        [ 18.,  21.],
        [ 18.,  19.]],

       [[ -2.,  38.],
        [ 15., -33.],
        [-28.,  11.],
        ...,
        [ 45.,  -8.],
        [-35.,  -6.],
        [-19., -12.]],

       ...,

       [[ 39., -21.],
        [-12.,  51.],
        [  3., -13.],
        ...,
        [ 15.,  26.],
        [ 16., -31.],
        [ 12.,   9.]],

       [[ 27.,  42.],
        [ 38., -27.],
        [ 14., -39.],
        ...,
        [ 11.,  22.],
        [ 11.,  31.],
        [-18., -26.]],

       [[-15., -17.],
        [ 13., -45.],
        [-37., -18.],
        ...,
        [ 15.,   9.],
        [ 15.,   8.],
        [ 38., -27.]]], dtype=float32), array([[[ 7.37012148e+00,  3.12197876e+02],
        [ 7.37341404e+00,  4.25564392e+02],

In [14]:
data = example_item[0]
labels = example_item[1]

In [15]:
for i in range(len(data)):
    print(data[i].shape)

(32, 500, 2)
(32, 500, 2)
(32, 500, 1)
