In [2]:
%load_ext autoreload
%autoreload 2
import numpy as np
import polars as pl
from nirs4all.dataset.features import Features

# Create sample data for 5 samples
source1_raw = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.float32)
source2_raw = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8], [0.9, 1.0]], dtype=np.float32)

# Initialize Features object and add feature sources
features = Features()
features.add_features({}, [source1_raw, source2_raw])
print("Initial Features object:", features)  # FeatureBlock(sources=2, samples=5)
print(features.index)


# Update processings with transformed data
normalized_data_1 = source1_raw / 10.0  # Example normalization
normalized_data_2 = source2_raw / 10.0  # Example normalization
# filtered_data = source2_raw + 1.0  # Example filtering
features.add_features({"processing": "normalized"}, [normalized_data_1, normalized_data_2])
print(features.index)

# Modify the index to include train/test partitions
# features.index = features.index.with_columns([
#     pl.when(pl.col("row") < 3)
#     .then(pl.lit("train"))
#     .otherwise(pl.lit("test"))
#     .alias("partition")
# ])
features.update_index([0, 1, 2], {"partition":"test"})
print("Updated index:\n", features.index)

# Define a filter for the training partition
filter_dict = {"partition": "train"}

# Retrieve features in different layouts
# 2D layout with source concatenation
x_train_2d_concat = features.x(filter_dict, layout="2d", src_concat=True)
print("\n2D concatenated shape:", x_train_2d_concat.shape)  # Expected: (3, 10)
print("2D concatenated data:\n", x_train_2d_concat)

# 2D layout without concatenation (separate sources)
x_train_2d_sources = features.x(filter_dict, layout="2d", src_concat=False)
print("\n2D per source shapes:", [arr.shape for arr in x_train_2d_sources])  # Expected: [(3, 6), (3, 4)]
print("Source 1 (2D):\n", x_train_2d_sources[0])
print("Source 2 (2D):\n", x_train_2d_sources[1])

# 3D layout without concatenation
x_train_3d = features.x(filter_dict, layout="3d", src_concat=False)
print("\n3D shapes:", [arr.shape for arr in x_train_3d])  # Expected: [(3, 2, 3), (3, 2, 2)]
print("Source 1 (3D):\n", x_train_3d[0])
print("Source 2 (3D):\n", x_train_3d[1])

x_train_3d = features.x(filter_dict, layout="3d", src_concat=True)
print("\n3D concatenated shape:", x_train_3d.shape)  # Expected: (3, 2, 10)
print("3D concatenated data:\n", x_train_3d)

# 2D interleaved layout without concatenation
x_train_2d_interleaved = features.x(filter_dict, layout="2d_interleaved", src_concat=False)
print("\n2D interleaved shapes:", [arr.shape for arr in x_train_2d_interleaved])  # Expected: [(3, 6), (3, 4)]
print("Source 1 (2D interleaved):\n", x_train_2d_interleaved[0])
print("Source 2 (2D interleaved):\n", x_train_2d_interleaved[1])

# 3D transpose layout without concatenation
x_train_3d_transpose = features.x(filter_dict, layout="3d_transpose", src_concat=False)
print("\n3D transpose shapes:", [arr.shape for arr in x_train_3d_transpose])  # Expected: [(3, 3, 2), (3, 2, 2)]
print("Source 1 (3D transpose):\n", x_train_3d_transpose[0])
print("Source 2 (3D transpose):\n", x_train_3d_transpose[1])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Initial Features object: FeatureBlock(sources=2, samples=5)
shape: (5, 7)
┌────────┬────────┬───────────┬───────┬────────┬────────────┬─────┐
│ sample ┆ origin ┆ partition ┆ group ┆ branch ┆ processing ┆ row │
│ ---    ┆ ---    ┆ ---       ┆ ---   ┆ ---    ┆ ---        ┆ --- │
│ i32    ┆ i32    ┆ cat       ┆ i8    ┆ i8     ┆ cat        ┆ i32 │
╞════════╪════════╪═══════════╪═══════╪════════╪════════════╪═════╡
│ 0      ┆ null   ┆ train     ┆ 0     ┆ 0      ┆ raw        ┆ 0   │
│ 1      ┆ null   ┆ train     ┆ 0     ┆ 0      ┆ raw        ┆ 1   │
│ 2      ┆ null   ┆ train     ┆ 0     ┆ 0      ┆ raw        ┆ 2   │
│ 3      ┆ null   ┆ train     ┆ 0     ┆ 0      ┆ raw        ┆ 3   │
│ 4      ┆ null   ┆ train     ┆ 0     ┆ 0      ┆ raw        ┆ 4   │
└────────┴────────┴───────────┴───────┴────────┴────────────┴─────┘
shape: (10, 7)
┌────────┬────────┬───────────┬───────┬────────┬────────────┬─────┐
│ sampl