In [None]:
from preprocess import Preprocess
import pandas as pd

**NOTA**: boundary, buildings e samples devono esser gia' stati proiettati in un CRS metrico. Usare epsg=2154 (CRS metrico) per la Francia!

#### Instantiating the Preprocess class...OK

In [None]:
preprocessor = Preprocess("Paris")

#### Processing the raw buildings and POIs downloaded from Geofabrik...OK

In [None]:
building, poi = preprocessor.get_building_and_poi()

#### Performing Poisson Disk Sampling...OK

In [None]:
radius = 50
random_point = preprocessor.poisson_disk_sampling(building, poi, radius)

#### Rasterize buildings...OK

In [None]:
preprocessor.rasterize_buildings(building)

#### Partition city data by road network...OK

**TODO**: gli autori segmentano una citta' tramite le traffic analysis zones. Problema: non ci sono shapefiles disponibili per Parigi. Per ora usiamo IRIS al posto delle TAZ, hanno densita' comparabile ed i limiti delle celle IRIS seguono la road network (analogamente alle celle TAZ).

In [None]:
preprocessor.partition(building, poi, random_point, radius)
print(f'Random Points: {len(random_point)}')

#### Chunk a previously created hdf5 file (execute only if necessary!)

In [None]:
import h5py
import numpy as np

# Path to the original HDF5 file
original_file_path = './data/processed/Paris/building_raster.hdf5'
# Path to the new HDF5 file to store the chunked dataset
new_file_path = './data/processed/Paris/building_raster_chunked.hdf5'

# Open your HDF5 file
with h5py.File(original_file_path, 'r+') as file:
    # Access the existing dataset (adjust 'dataset_name' as needed)
    old_dataset = file['images']

    # Open a new HDF5 file in write mode to store the chunked dataset
    with h5py.File(new_file_path, 'w') as new_file:

        # Set the size of the chunks in the new dataset.
        chunks = (128, old_dataset.shape[1], old_dataset.shape[2])
        
        print(f"Number of elements in the dataset: {len(old_dataset)}")
        print(f"Size of a single dataset element: {old_dataset[0].shape}")
        print(f"Chunk size: {chunks}")
        new_dataset = new_file.create_dataset('images',
                                              shape=old_dataset.shape,
                                              dtype=old_dataset.dtype,
                                              chunks=chunks,
                                              compression="gzip")  # Example: chunk size for a batch of images
    
        # Copy data from the old dataset to the new dataset
        # Here assuming that loading the entire dataset into memory is feasible; otherwise, do this in smaller parts
        dataset_size = len(old_dataset)
        batch_size = 50000
        num_batches = dataset_size / batch_size
        for idx in range(0, dataset_size, batch_size) :
            end_index = min(idx + batch_size, dataset_size)
            new_dataset[idx : end_index] = old_dataset[idx : end_index]
            print(f"Copied batch from {idx} to {end_index}")