In [1]:
import numpy as np
from nirs4all.dataset.feature_source import FeatureSource  # Assuming the class is in feature_source.py

# Initialize with a 2D array: 3 samples, 2 features
initial_data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
fs = FeatureSource(initial_data)
print(fs)  # FeatureSource(shape=(3, 1, 2), dtype=float32, processing_ids=['raw'])

# Add a single processing ID
fs.add_processings("proc1")
print(fs)  # FeatureSource(shape=(3, 2, 2), dtype=float32, processing_ids=['raw', 'proc1'])

# Add multiple processing IDs
fs.add_processings(["proc2", "proc3"])
print(fs)  # FeatureSource(shape=(3, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])

# Add new samples: 2 new samples with 2 features
new_samples = np.array([[7.0, 8.0], [9.0, 10.0]], dtype=np.float32)
fs.add_samples(new_samples)
print(fs)  # FeatureSource(shape=(5, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])

# Augment samples: repeat indices 0 and 1, twice each
fs.augment_samples(count=2, indices=[0, 1])
print(fs)  # FeatureSource(shape=(9, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])

# Update processing 'proc1' with new data
new_data = np.array([[10.0, 20.0], [30.0, 40.0], [50.0, 60.0], [70.0, 80.0], [90.0, 100.0],
                     [110.0, 120.0], [130.0, 140.0], [150.0, 160.0], [170.0, 180.0]], dtype=np.float32)
fs.update_processing("proc1", new_data, "proc1_updated")
print(fs)  # FeatureSource(shape=(9, 5, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3', 'proc1_updated'])

# Get 2D layout for rows 0, 1, 2
row_indices = np.array([0, 1, 2])
layout_2d = fs.layout_2d(row_indices)
print(layout_2d.shape)  # (3, 10)
print(layout_2d)  # Should print the 2D layout for the specified rows

# Get 2D interleaved layout for rows 0, 1, 2
layout_2d_interleaved = fs.layout_2d_interleaved(row_indices)
print(layout_2d_interleaved.shape)  # (3, 10)
print(layout_2d_interleaved)  # Should print the interleaved 2D layout for the specified rows

# Get 3D layout for rows 0, 1, 2
layout_3d = fs.layout_3d(row_indices)
print(layout_3d.shape)  # (3, 5, 2)
print(layout_3d)  # Should print the 3D layout for the specified rows

# Get 3D transposed layout for rows 0, 1, 2
layout_3d_transpose = fs.layout_3d_transpose(row_indices)
print(layout_3d_transpose.shape)  # (3, 2, 5)
print(layout_3d_transpose)  # Should print the transposed 3D layout for the specified rows

FeatureSource(shape=(3, 1, 2), dtype=float32, processing_ids=['raw'])
FeatureSource(shape=(3, 2, 2), dtype=float32, processing_ids=['raw', 'proc1'])
FeatureSource(shape=(3, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])
FeatureSource(shape=(5, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])
FeatureSource(shape=(9, 4, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3'])
FeatureSource(shape=(9, 5, 2), dtype=float32, processing_ids=['raw', 'proc1', 'proc2', 'proc3', 'proc1_updated'])
(3, 10)
[[ 1.  2. 10. 20.  1.  2.  1.  2.  1.  2.]
 [ 3.  4. 30. 40.  3.  4.  3.  4.  3.  4.]
 [ 5.  6. 50. 60.  5.  6.  5.  6.  5.  6.]]
(3, 10)
[[ 1. 10.  1.  1.  1.  2. 20.  2.  2.  2.]
 [ 3. 30.  3.  3.  3.  4. 40.  4.  4.  4.]
 [ 5. 50.  5.  5.  5.  6. 60.  6.  6.  6.]]
(3, 5, 2)
[[[ 1.  2.]
  [10. 20.]
  [ 1.  2.]
  [ 1.  2.]
  [ 1.  2.]]

 [[ 3.  4.]
  [30. 40.]
  [ 3.  4.]
  [ 3.  4.]
  [ 3.  4.]]

 [[ 5.  6.]
  [50. 60.]
  [ 5.  6.]


In [3]:
%load_ext autoreload
%autoreload 2
import numpy as np
import polars as pl
from nirs4all.dataset.features import Features

# Create sample data for 5 samples
source1_raw = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.float32)
source2_raw = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8], [0.9, 1.0]], dtype=np.float32)

# Initialize Features object and add feature sources
features = Features()
features.add_features([source1_raw, source2_raw])
print("Initial Features object:", features)  # FeatureBlock(sources=2, samples=5)
print(features.index)


# Add processings to each source
features.sources[0].add_processings("normalized")
features.sources[1].add_processings("filtered")

# Update processings with transformed data
normalized_data = source1_raw / 10.0  # Example normalization
features.sources[0].update_processing("normalized", normalized_data, "normalized2")

filtered_data = source2_raw + 1.0  # Example filtering
features.sources[1].update_processing("filtered", filtered_data, "filtered2")
print(features.index)

# Modify the index to include train/test partitions
# features.index = features.index.with_columns([
#     pl.when(pl.col("row") < 3)
#     .then(pl.lit("train"))
#     .otherwise(pl.lit("test"))
#     .alias("partition")
# ])
features.update_index([0, 1, 2], "partition", "test")
print("Updated index:\n", features.index)

# Define a filter for the training partition
filter_dict = {"partition": "train"}

# Retrieve features in different layouts
# 2D layout with source concatenation
x_train_2d_concat = features.x(filter_dict, layout="2d", src_concat=True)
print("\n2D concatenated shape:", x_train_2d_concat.shape)  # Expected: (3, 10)
print("2D concatenated data:\n", x_train_2d_concat)

# 2D layout without concatenation (separate sources)
x_train_2d_sources = features.x(filter_dict, layout="2d", src_concat=False)
print("\n2D per source shapes:", [arr.shape for arr in x_train_2d_sources])  # Expected: [(3, 6), (3, 4)]
print("Source 1 (2D):\n", x_train_2d_sources[0])
print("Source 2 (2D):\n", x_train_2d_sources[1])

# 3D layout without concatenation
x_train_3d = features.x(filter_dict, layout="3d", src_concat=False)
print("\n3D shapes:", [arr.shape for arr in x_train_3d])  # Expected: [(3, 2, 3), (3, 2, 2)]
print("Source 1 (3D):\n", x_train_3d[0])
print("Source 2 (3D):\n", x_train_3d[1])

# 2D interleaved layout without concatenation
x_train_2d_interleaved = features.x(filter_dict, layout="2d_interleaved", src_concat=False)
print("\n2D interleaved shapes:", [arr.shape for arr in x_train_2d_interleaved])  # Expected: [(3, 6), (3, 4)]
print("Source 1 (2D interleaved):\n", x_train_2d_interleaved[0])
print("Source 2 (2D interleaved):\n", x_train_2d_interleaved[1])

# 3D transpose layout without concatenation
x_train_3d_transpose = features.x(filter_dict, layout="3d_transpose", src_concat=False)
print("\n3D transpose shapes:", [arr.shape for arr in x_train_3d_transpose])  # Expected: [(3, 3, 2), (3, 2, 2)]
print("Source 1 (3D transpose):\n", x_train_3d_transpose[0])
print("Source 2 (3D transpose):\n", x_train_3d_transpose[1])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Initial Features object: FeatureBlock(sources=2, samples=5)
shape: (5, 7)
┌─────┬────────┬────────┬───────────┬───────┬────────┬────────────┐
│ row ┆ sample ┆ origin ┆ partition ┆ group ┆ branch ┆ processing │
│ --- ┆ ---    ┆ ---    ┆ ---       ┆ ---   ┆ ---    ┆ ---        │
│ i32 ┆ i32    ┆ i32    ┆ cat       ┆ i8    ┆ i8     ┆ cat        │
╞═════╪════════╪════════╪═══════════╪═══════╪════════╪════════════╡
│ 0   ┆ 0      ┆ 0      ┆ train     ┆ 0     ┆ 0      ┆ raw        │
│ 1   ┆ 1      ┆ 1      ┆ train     ┆ 0     ┆ 0      ┆ raw        │
│ 2   ┆ 2      ┆ 2      ┆ train     ┆ 0     ┆ 0      ┆ raw        │
│ 3   ┆ 3      ┆ 3      ┆ train     ┆ 0     ┆ 0      ┆ raw        │
│ 4   ┆ 4      ┆ 4      ┆ train     ┆ 0     ┆ 0      ┆ raw        │
└─────┴────────┴────────┴───────────┴───────┴────────┴────────────┘
shape: (5, 7)
┌─────┬────────┬────────┬───────────┬───────┬────────┬────────────┐
│ row ┆ 