# Step 2: Fully manual bulk data management


In [None]:
from pathlib import Path

import numpy as np
import tlc
from data_sources import random_array_generator
from tlc.core.helpers.bulk_data_helper import BulkDataHelper

In [None]:
schema = tlc.Geometry2DSchema(
    include_2d_vertices=True,
    per_vertex_schemas={"intensity": tlc.Float32ListSchema()},
    is_bulk_data=True,  # This is what sets up the "sibling" paths with the "_binary_property_url" suffix
)

In [None]:
points_2d_generator = random_array_generator((4, 2))  # generates 4 2d points at a time
intensity_generator = random_array_generator((4,), dtype=np.float32)

In [None]:
bulk_data_path = Path("bulk_data/2").absolute()
bulk_data_path.mkdir(parents=True, exist_ok=True)

In [None]:
from collections import defaultdict

rows = []
chunk_offsets = defaultdict(int)

for i in range(10):
    points_2d = next(points_2d_generator)
    intensity = next(intensity_generator)

    chunk = i // 3  # We are now in manual mode, so it our responsibility to rotate chunks as needed
    bulk_data_file = bulk_data_path / f"{chunk}.raw"

    points_2d_length = np.prod(points_2d.shape) * points_2d.dtype.itemsize
    intensity_length = np.prod(intensity.shape) * intensity.dtype.itemsize

    points_2d_binary = points_2d.tobytes()
    intensity_binary = intensity.tobytes()

    assert len(points_2d_binary) == points_2d_length
    assert len(intensity_binary) == intensity_length

    with open(bulk_data_file, "ab") as f:
        written = f.write(points_2d_binary)
        assert written == points_2d_length
        chunk_offsets[chunk] += points_2d_length
        points_binary_property_value = BulkDataHelper.get_bulk_data_url(
            bulk_data_file, chunk_offsets[chunk], points_2d_length
        )

        written = f.write(intensity_binary)
        assert written == intensity_length
        chunk_offsets[chunk] += intensity_length
        intensity_binary_property_value = BulkDataHelper.get_bulk_data_url(
            bulk_data_file, chunk_offsets[chunk], intensity_length
        )

    row = {
        "x_min": 0,
        "y_min": 0,
        "x_max": 1,
        "y_max": 1,
        "instances": [
            {
                # "vertices_2d": [],
                "vertices_2d_binary_property_url": points_binary_property_value,
                "vertices_2d_additional_data": {
                    # "intensity": [],
                    "intensity_binary_property_url": intensity_binary_property_value
                },
            }
        ],
    }
    rows.append(row)

In [None]:
row

## Write the Table!

In [None]:
table_writer = tlc.TableWriter(
    table_name="externalize-manually-no-processor",
    dataset_name="pre-externalized-dataset",
    project_name="External Bulk Data",
    description="Pre-externalized table",
    column_schemas={"vertices": schema},  # We use the same schema as before
    if_exists="rename",
)

In [None]:
for row in rows:
    table_writer.add_row({"vertices": row})

table = table_writer.finalize()

In [None]:
table.table_rows[0]["vertices"]