# Global SKM Data Preparation
Take in the global gaussians CSV file and generate the old ID-less array expected by the algorithm explorer.

In [1]:
DATA_NAME = "playroom_23"
NUMBER_OF_SPLATS_PER_PIXEL = 500

# Load the data from CSV

In [2]:
from os.path import join

from numpy import float32
from polars import Float32, Schema, UInt8, UInt32, scan_csv, all

from clustering_exploration.utils.constants import DATA_DIR, IMAGE_HEIGHT, IMAGE_WIDTH

# Define the column names.
column_names = [
    f"gaussian_{i}_{part}" for i in range(NUMBER_OF_SPLATS_PER_PIXEL) for part in
    ["id", "alpha", "depth", "color_r", "color_g", "color_b"]
]
column_names = [
    "sample_index",
    "out_color_r",
    "out_color_g",
    "out_color_b",
    "background_r",
    "background_g",
    "background_b",
    *column_names,
]

# Define schema.
schema_dict = {name: Float32 for name in column_names}
schema_dict["sample_index"] = UInt32
schema_dict["background_r"] = UInt8
schema_dict["background_g"] = UInt8
schema_dict["background_b"] = UInt8
schema = Schema(schema_dict)

# Load the data from CSV.
data_path = join(DATA_DIR, f"{DATA_NAME}.csv")
raw_data = scan_csv(data_path, schema=schema)

# Extracts the splats.
id_splats = (
    raw_data.select(
        all().exclude(
            "sample_index",
            "out_color_r",
            "out_color_g",
            "out_color_b",
            "background_r",
            "background_g",
            "background_b",
        )
    )
    .collect()
    .to_numpy()
    .astype(float32, copy=False)
    .reshape((IMAGE_HEIGHT * IMAGE_WIDTH, NUMBER_OF_SPLATS_PER_PIXEL, 6))
)