### 1. Prepare the prompt:

In [12]:
from l2am.dataset_utils import prepare_text_samples_batch
from datasets import load_dataset, Dataset
data_path = "data/l2am_r2r/episodes_part_0008.json"
raw_ds = load_dataset("json", data_files=data_path)["train"]
# 应用转换
frame_ds = raw_ds.map(
        prepare_text_samples_batch,
        batched=True,
        remove_columns=raw_ds.column_names,
        desc="Building text prompts",
        num_proc=4  # 并行加速（可选）
    )
print(f"Total frames: {len(frame_ds)}")
frame_ds[0]  # 查看一个样本

Total frames: 5884


{'prompt': 'Observation Grid:\n[0,0]: depth=0.00, sem=void; [0,1]: depth=0.00, sem=void; [0,2]: depth=0.00, sem=void; [0,3]: depth=0.00, sem=void; [0,4]: depth=0.00, sem=void; [0,5]: depth=0.00, sem=void\n[1,0]: depth=0.00, sem=void; [1,1]: depth=0.00, sem=void; [1,2]: depth=0.00, sem=void; [1,3]: depth=0.00, sem=void; [1,4]: depth=0.00, sem=void; [1,5]: depth=0.00, sem=void\n[2,0]: depth=2.37, sem=void; [2,1]: depth=0.09, sem=void; [2,2]: depth=0.00, sem=void; [2,3]: depth=0.00, sem=void; [2,4]: depth=0.00, sem=void; [2,5]: depth=0.00, sem=void\n[3,0]: depth=4.81, sem=wall; [3,1]: depth=0.28, sem=void; [3,2]: depth=0.00, sem=void; [3,3]: depth=0.00, sem=void; [3,4]: depth=0.00, sem=void; [3,5]: depth=0.00, sem=void\n[4,0]: depth=2.38, sem=void; [4,1]: depth=0.05, sem=void; [4,2]: depth=0.00, sem=void; [4,3]: depth=0.00, sem=void; [4,4]: depth=0.00, sem=void; [4,5]: depth=0.00, sem=void\n[5,0]: depth=0.00, sem=void; [5,1]: depth=0.00, sem=void; [5,2]: depth=0.00, sem=void; [5,3]: depth

In [13]:
print("Prompt:")
print("------------------------------>")
print(frame_ds[0]['prompt'])  # 查看对应的文本提示
print("+" * 30)
print("Ground Truth Action:")  # 查看对应的动作标签
print("------------------------------>")
print(frame_ds[0]['action'])

Prompt:
------------------------------>
Observation Grid:
[0,0]: depth=0.00, sem=void; [0,1]: depth=0.00, sem=void; [0,2]: depth=0.00, sem=void; [0,3]: depth=0.00, sem=void; [0,4]: depth=0.00, sem=void; [0,5]: depth=0.00, sem=void
[1,0]: depth=0.00, sem=void; [1,1]: depth=0.00, sem=void; [1,2]: depth=0.00, sem=void; [1,3]: depth=0.00, sem=void; [1,4]: depth=0.00, sem=void; [1,5]: depth=0.00, sem=void
[2,0]: depth=2.37, sem=void; [2,1]: depth=0.09, sem=void; [2,2]: depth=0.00, sem=void; [2,3]: depth=0.00, sem=void; [2,4]: depth=0.00, sem=void; [2,5]: depth=0.00, sem=void
[3,0]: depth=4.81, sem=wall; [3,1]: depth=0.28, sem=void; [3,2]: depth=0.00, sem=void; [3,3]: depth=0.00, sem=void; [3,4]: depth=0.00, sem=void; [3,5]: depth=0.00, sem=void
[4,0]: depth=2.38, sem=void; [4,1]: depth=0.05, sem=void; [4,2]: depth=0.00, sem=void; [4,3]: depth=0.00, sem=void; [4,4]: depth=0.00, sem=void; [4,5]: depth=0.00, sem=void
[5,0]: depth=0.00, sem=void; [5,1]: depth=0.00, sem=void; [5,2]: depth=0.00, 

### 2. Inference example:

In [14]:
prompt = frame_ds[0]['prompt']


In [15]:
# inference.py
import os
import torch
from transformers import AutoTokenizer
from l2am.model_zoo import WeightedSequenceClassifier  # 确保能导入
import numpy as np
from safetensors.torch import load_file

# ======================
# 配置
# ======================
MODEL_CHECKPOINT = "data/l2a_longformer_action_classifier/checkpoint-81000"
HF_CACHE_DIR = "data/hf_model_cache"
MAX_LENGTH = 1024

# 从 dataset 准备一个样本进行测试，构建prompt示例代码如下：
from l2am.dataset_utils import prepare_text_samples_batch
from datasets import load_dataset, Dataset
data_path = "data/l2am_r2r/episodes_part_0008.json"
raw_ds = load_dataset("json", data_files=data_path)["train"]
# 应用转换
frame_ds = raw_ds.map(
        prepare_text_samples_batch,
        batched=True,
        remove_columns=raw_ds.column_names,
        desc="Building text prompts",
        num_proc=4  # 并行加速（可选）
    )
print(f"Total frames: {len(frame_ds)}")

test_frame_id = 3  # 选择要测试的样本 ID
EXAMPLE_PROMPT = frame_ds[test_frame_id]['prompt']

GROUND_TRUTH_ACTION = frame_ds[test_frame_id]['action']



def main(hf_cache_dir=HF_CACHE_DIR, model_checkpoint=MODEL_CHECKPOINT, max_length=MAX_LENGTH):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Step 1: 加载 tokenizer（从 checkpoint 目录或原始模型）
    tokenizer = AutoTokenizer.from_pretrained(
        model_checkpoint,
        cache_dir=hf_cache_dir,
        clean_up_tokenization_spaces=True
    )

    # Step 2: 推断 num_labels（根据你的任务，已知是 4 类？）
    num_labels = 4  # class_0 ~ class_3 

    # Step 3: 重建模型结构（必须与训练时完全一致）
    # 注意：class_weights 在推理时不影响前向传播（只用于 loss），所以可以传 dummy 值
    dummy_class_weights = torch.ones(num_labels)  # 推理时 loss 不计算，权重无影响

    model = WeightedSequenceClassifier(
        model_name="allenai/longformer-base-4096",  # 或从 checkpoint 加载 config
        num_labels=num_labels,
        class_weights=dummy_class_weights,
        cache_dir=hf_cache_dir,
    )

    # Step 4: 加载训练好的权重
    
    model_file = os.path.join(model_checkpoint, "model.safetensors")
    state_dict = load_file(model_file, device=str(device))
    model.load_state_dict(state_dict, strict=True)
    model.to(device)
    model.eval()

    # Step 5: Tokenize & 推理
    inputs = tokenizer(
        EXAMPLE_PROMPT,
        truncation=True,
        padding="max_length",
        max_length=max_length,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        # logits = outputs.logits
        logits = outputs["logits"]
        pred_class = torch.argmax(logits, dim=-1).item()

    print("\n" + "="*50)
    print("Prompt:")
    print(EXAMPLE_PROMPT)
    print("\nGround Truth Action:", GROUND_TRUTH_ACTION)
    print("Predicted Action:", pred_class)
    print("="*50)

    if pred_class == GROUND_TRUTH_ACTION:
        print("✅ Prediction matches ground truth!")
    else:
        print("❌ Prediction differs from ground truth.")


if __name__ == "__main__":
    main()

Total frames: 5884
Using device: cuda


Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Initializing global attention on CLS token...



Prompt:
Observation Grid:
[0,0]: depth=1.71, sem=wall; [0,1]: depth=0.75, sem=window; [0,2]: depth=0.00, sem=void; [0,3]: depth=0.00, sem=void; [0,4]: depth=0.00, sem=void; [0,5]: depth=0.00, sem=void
[1,0]: depth=1.63, sem=wall; [1,1]: depth=0.74, sem=window; [1,2]: depth=0.00, sem=void; [1,3]: depth=0.00, sem=void; [1,4]: depth=0.00, sem=void; [1,5]: depth=0.00, sem=void
[2,0]: depth=1.62, sem=wall; [2,1]: depth=1.07, sem=window; [2,2]: depth=3.68, sem=void; [2,3]: depth=1.50, sem=void; [2,4]: depth=0.00, sem=void; [2,5]: depth=0.00, sem=void
[3,0]: depth=1.60, sem=wall; [3,1]: depth=1.25, sem=window; [3,2]: depth=6.75, sem=window; [3,3]: depth=3.86, sem=wall; [3,4]: depth=0.00, sem=void; [3,5]: depth=0.00, sem=void
[4,0]: depth=1.34, sem=floor; [4,1]: depth=1.08, sem=window; [4,2]: depth=1.23, sem=void; [4,3]: depth=0.70, sem=void; [4,4]: depth=0.00, sem=void; [4,5]: depth=0.00, sem=void
[5,0]: depth=0.82, sem=floor; [5,1]: depth=0.75, sem=window; [5,2]: depth=0.82, sem=void; [5,3]

### 3. Action chunk inference with bigbird

In [16]:
# inference.py
import os
import torch
from transformers import AutoTokenizer
from l2am.model_zoo import MultiStepWeightedClassifier  # 确保能导入
import numpy as np
from safetensors.torch import load_file

# ======================
# 配置
# ======================
MODEL_CHECKPOINT = "data/l2a_bigbird_action_classifier_chunk4/checkpoint-185500"
HF_CACHE_DIR = "data/hf_model_cache"
MAX_LENGTH = 1024
MODEL_NAME = "google/bigbird-roberta-base"  # 可替换为 roberta-base、 bert-base-uncased、allenai/longformer-base-4096、google/bigbird-roberta-base等

NUM_CHUNK = 4  # 与训练时保持一致

# 从 dataset 准备一个样本进行测试，构建prompt示例代码如下：
from l2am.dataset_utils import prepare_text_samples_batch_chunk_v1
from datasets import load_dataset, Dataset
data_path = "data/l2am_r2r/episodes_part_0008.json"
raw_ds = load_dataset("json", data_files=data_path)["train"]
# 应用转换
frame_ds = raw_ds.map(
        prepare_text_samples_batch_chunk_v1,
        batched=True,
        remove_columns=raw_ds.column_names,
        desc="Building text prompts",
        num_proc=16  # 并行加速（可选）
    )
print(f"Total frames: {len(frame_ds)}")

test_frame_id = 3  # 选择要测试的样本 ID
EXAMPLE_PROMPT = frame_ds[test_frame_id]['prompt']

GROUND_TRUTH_ACTION = frame_ds[test_frame_id]['action_chunk']



def main(hf_cache_dir=HF_CACHE_DIR, model_checkpoint=MODEL_CHECKPOINT, max_length=MAX_LENGTH):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Step 1: 加载 tokenizer（从 checkpoint 目录或原始模型）
    from transformers import BigBirdTokenizer

    tokenizer = BigBirdTokenizer.from_pretrained(
        model_checkpoint,
        cache_dir=hf_cache_dir,
        clean_up_tokenization_spaces=True,
    )

    # Step 2: 推断 num_labels（根据你的任务，已知是 4 类？）
    num_labels = 4  # class_0 ~ class_3 

    # Step 3: 重建模型结构（必须与训练时完全一致）
    # 注意：class_weights 在推理时不影响前向传播（只用于 loss），所以可以传 dummy 值
    dummy_class_weights = torch.ones(num_labels)  # 推理时 loss 不计算，权重无影响

    
    model = MultiStepWeightedClassifier(
        MODEL_NAME,
        num_labels=num_labels,
        class_weights=dummy_class_weights,
        num_steps=NUM_CHUNK,
        cache_dir=hf_cache_dir,
    )

    # Step 4: 加载训练好的权重
    model_safetensors = os.path.join(model_checkpoint, "model.safetensors")
    model_bin = os.path.join(model_checkpoint, "pytorch_model.bin")

    if os.path.exists(model_safetensors):
        state_dict = load_file(model_safetensors, device=str(device))
    elif os.path.exists(model_bin):
        state_dict = torch.load(model_bin, map_location=device)
    else:
        raise FileNotFoundError("Neither 'model.safetensors' nor 'pytorch_model.bin' found in checkpoint directory.")

    model.load_state_dict(state_dict, strict=True)
    model.to(device)
    model.eval()

    # Step 5: Tokenize & 推理
    inputs = tokenizer(
        EXAMPLE_PROMPT,
        truncation=True,
        padding="max_length",
        max_length=max_length,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs["logits"]
        # 对每个 step 取 argmax -> (B, num_steps)
        pred_classes = torch.argmax(logits, dim=-1)  # 注意：不是 .item()
        # pred_class = torch.argmax(logits, dim=-1).item()
         # 如果 batch_size == 1，可以 squeeze 得到 (num_steps,)
        if pred_classes.shape[0] == 1:
            pred_classes = pred_classes.squeeze(0)  # shape: (num_steps,)

        # 转为 Python list（便于打印或比较）
        pred_list = pred_classes.cpu().tolist()

    print("\n" + "="*50)
    print("Prompt:")
    print(EXAMPLE_PROMPT)
    print("\nGround Truth Action:", GROUND_TRUTH_ACTION)
    print("Predicted Action:", pred_list)
    print("="*50)

    if pred_list == GROUND_TRUTH_ACTION:
        print("✅ Prediction matches ground truth!")
    else:
        print("❌ Prediction differs from ground truth.")


if __name__ == "__main__":
    main()

Total frames: 5884
Using device: cuda

Prompt:
Observation Grid:
[0,0]: depth=1.71, sem=wall; [0,1]: depth=0.75, sem=window; [0,2]: depth=0.00, sem=void; [0,3]: depth=0.00, sem=void; [0,4]: depth=0.00, sem=void; [0,5]: depth=0.00, sem=void
[1,0]: depth=1.63, sem=wall; [1,1]: depth=0.74, sem=window; [1,2]: depth=0.00, sem=void; [1,3]: depth=0.00, sem=void; [1,4]: depth=0.00, sem=void; [1,5]: depth=0.00, sem=void
[2,0]: depth=1.62, sem=wall; [2,1]: depth=1.07, sem=window; [2,2]: depth=3.68, sem=void; [2,3]: depth=1.50, sem=void; [2,4]: depth=0.00, sem=void; [2,5]: depth=0.00, sem=void
[3,0]: depth=1.60, sem=wall; [3,1]: depth=1.25, sem=window; [3,2]: depth=6.75, sem=window; [3,3]: depth=3.86, sem=wall; [3,4]: depth=0.00, sem=void; [3,5]: depth=0.00, sem=void
[4,0]: depth=1.34, sem=floor; [4,1]: depth=1.08, sem=window; [4,2]: depth=1.23, sem=void; [4,3]: depth=0.70, sem=void; [4,4]: depth=0.00, sem=void; [4,5]: depth=0.00, sem=void
[5,0]: depth=0.82, sem=floor; [5,1]: depth=0.75, sem=wind

### 4. Action chunk inference with bigbird (using class api)

In [4]:
from l2am.inference_api import ActionChunkPredictor
from l2am.dataset_utils import prepare_text_samples_batch_chunk_v1
from datasets import load_dataset

def main():
    # ======================
    # 配置
    # ======================
    MODEL_CHECKPOINT = "data/l2a_bigbird_action_classifier_chunk4/checkpoint-189500"
    HF_CACHE_DIR = "data/hf_model_cache"
    MAX_LENGTH = 1024
    NUM_CHUNK = 4
    NUM_LABELS = 4

    # 加载测试样本
    data_path = "data/l2am_r2r/episodes_part_0109.json"
    raw_ds = load_dataset("json", data_files=data_path)["train"]
    frame_ds = raw_ds.map(
        prepare_text_samples_batch_chunk_v1,
        batched=True,
        remove_columns=raw_ds.column_names,
        desc="Building text prompts",
        num_proc=16,
    )
    print(f"Total frames: {len(frame_ds)}")

    # 初始化预测器
    predictor = ActionChunkPredictor(
        model_checkpoint=MODEL_CHECKPOINT,
        hf_cache_dir=HF_CACHE_DIR,
        num_labels=NUM_LABELS,
        num_steps=NUM_CHUNK,
        max_length=MAX_LENGTH,
    )
    
    test_frame_id = 35
    EXAMPLE_PROMPT = frame_ds[test_frame_id]['prompt']
    GROUND_TRUTH_ACTION = frame_ds[test_frame_id]['action_chunk']

    # 推理
    pred_action = predictor.predict(EXAMPLE_PROMPT)
    pred_action_clean = predictor.predict_clean(EXAMPLE_PROMPT) # 自动处理action chunk中的特殊补全标记

    # 输出结果
    print("\n" + "=" * 50)
    print("Prompt:")
    print(EXAMPLE_PROMPT)
    print("\nGround Truth Action:", GROUND_TRUTH_ACTION)
    print("Predicted Action:     ", pred_action)
    print("Predicted Action (Clean):     ", pred_action_clean)
    print("=" * 50)

    if pred_action == GROUND_TRUTH_ACTION:
        print("✅ Prediction matches ground truth!")
    else:
        print("❌ Prediction differs from ground truth.")


if __name__ == "__main__":
    main()

Total frames: 1110
[ActionChunkPredictor] Using device: cuda
Loading weights from pytorch_model.bin: data/l2a_bigbird_action_classifier_chunk4/checkpoint-189500/pytorch_model.bin

Prompt:
Observation Grid:
[0,0]: depth=2.06, sem=ceiling; [0,1]: depth=2.06, sem=ceiling; [0,2]: depth=2.06, sem=ceiling; [0,3]: depth=1.65, sem=ceiling; [0,4]: depth=0.94, sem=column; [0,5]: depth=0.94, sem=wall
[1,0]: depth=2.63, sem=wall; [1,1]: depth=2.91, sem=wall; [1,2]: depth=2.96, sem=wall; [1,3]: depth=2.38, sem=column; [1,4]: depth=0.94, sem=column; [1,5]: depth=0.84, sem=wall
[2,0]: depth=3.59, sem=wall; [2,1]: depth=5.57, sem=wall; [2,2]: depth=4.36, sem=wall; [2,3]: depth=3.95, sem=wall; [2,4]: depth=0.94, sem=column; [2,5]: depth=0.95, sem=wall
[3,0]: depth=2.90, sem=wall; [3,1]: depth=3.46, sem=floor; [3,2]: depth=3.16, sem=floor; [3,3]: depth=2.88, sem=column; [3,4]: depth=0.95, sem=column; [3,5]: depth=0.95, sem=wall
[4,0]: depth=1.38, sem=floor; [4,1]: depth=1.38, sem=floor; [4,2]: depth=1.3

#### 4.1 Cases

In [6]:
from l2am.inference_api import ActionChunkPredictor
from l2am.dataset_utils import prepare_text_samples_batch_chunk_v1
from datasets import load_dataset


MODEL_CHECKPOINT = "data/l2a_bigbird_action_classifier_chunk4/checkpoint-189500"
HF_CACHE_DIR = "data/hf_model_cache"
MAX_LENGTH = 1024
NUM_CHUNK = 4
NUM_LABELS = 4

# 加载测试样本
data_path = "data/l2am_r2r/episodes_part_0109.json"
raw_ds = load_dataset("json", data_files=data_path)["train"]
frame_ds = raw_ds.map(
    prepare_text_samples_batch_chunk_v1,
    batched=True,
    remove_columns=raw_ds.column_names,
    desc="Building text prompts",
    num_proc=16,
)
print(f"Total frames: {len(frame_ds)}")

# 初始化预测器
predictor = ActionChunkPredictor(
    model_checkpoint=MODEL_CHECKPOINT,
    hf_cache_dir=HF_CACHE_DIR,
    num_labels=NUM_LABELS,
    num_steps=NUM_CHUNK,
    max_length=MAX_LENGTH,
)

# 测试另一个样本
test_frame_id = 27
EXAMPLE_PROMPT = frame_ds[test_frame_id]['prompt']
GROUND_TRUTH_ACTION = frame_ds[test_frame_id]['action_chunk']

# 推理
pred_action = predictor.predict(EXAMPLE_PROMPT)
pred_action_clean = predictor.predict_clean(EXAMPLE_PROMPT) # 自动处理action chunk中的特殊补全标记

# 输出结果
print("\n" + "=" * 50)
print("Prompt:")
print(EXAMPLE_PROMPT)
print("\nGround Truth Action:", GROUND_TRUTH_ACTION)
print("Predicted Action:     ", pred_action)
print("Predicted Action (Clean):     ", pred_action_clean)
print("=" * 50)

if pred_action == GROUND_TRUTH_ACTION:
    print("✅ Prediction matches ground truth!")
else:
    print("❌ Prediction differs from ground truth.")


Total frames: 1110
[ActionChunkPredictor] Using device: cuda
Loading weights from pytorch_model.bin: data/l2a_bigbird_action_classifier_chunk4/checkpoint-189500/pytorch_model.bin

Prompt:
Observation Grid:
[0,0]: depth=2.64, sem=wall; [0,1]: depth=2.53, sem=beam; [0,2]: depth=2.41, sem=beam; [0,3]: depth=1.93, sem=column; [0,4]: depth=0.32, sem=railing; [0,5]: depth=0.28, sem=railing
[1,0]: depth=3.75, sem=ceiling; [1,1]: depth=3.28, sem=column; [1,2]: depth=3.57, sem=ceiling; [1,3]: depth=2.07, sem=objects; [1,4]: depth=0.39, sem=railing; [1,5]: depth=0.28, sem=railing
[2,0]: depth=2.78, sem=chair; [2,1]: depth=3.58, sem=wall; [2,2]: depth=6.32, sem=wall; [2,3]: depth=3.55, sem=column; [2,4]: depth=0.50, sem=railing; [2,5]: depth=0.28, sem=railing
[3,0]: depth=1.88, sem=chair; [3,1]: depth=2.75, sem=wall; [3,2]: depth=3.48, sem=wall; [3,3]: depth=2.56, sem=column; [3,4]: depth=0.50, sem=railing; [3,5]: depth=0.24, sem=railing
[4,0]: depth=1.07, sem=wall; [4,1]: depth=1.07, sem=wall; [

In [17]:
# 测试另一个样本
test_frame_id = 40
EXAMPLE_PROMPT = frame_ds[test_frame_id]['prompt']
GROUND_TRUTH_ACTION = frame_ds[test_frame_id]['action_chunk']

# 推理
pred_action = predictor.predict(EXAMPLE_PROMPT)
pred_action_clean = predictor.predict_clean(EXAMPLE_PROMPT) # 自动处理action chunk中的特殊补全标记

# 输出结果
print("\n" + "=" * 50)
print("Prompt:")
print(EXAMPLE_PROMPT)
print("\nGround Truth Action:", GROUND_TRUTH_ACTION)
print("Predicted Action:     ", pred_action)
print("Predicted Action (Clean):     ", pred_action_clean)
print("=" * 50)

if pred_action == GROUND_TRUTH_ACTION:
    print("✅ Prediction matches ground truth!")
else:
    print("❌ Prediction differs from ground truth.")


Prompt:
Observation Grid:
[0,0]: depth=1.84, sem=wall; [0,1]: depth=1.71, sem=wall; [0,2]: depth=1.74, sem=wall; [0,3]: depth=1.85, sem=wall; [0,4]: depth=1.77, sem=wall; [0,5]: depth=1.34, sem=wall
[1,0]: depth=2.08, sem=wall; [1,1]: depth=2.81, sem=ceiling; [1,2]: depth=1.78, sem=wall; [1,3]: depth=3.00, sem=ceiling; [1,4]: depth=2.43, sem=ceiling; [1,5]: depth=1.34, sem=wall
[2,0]: depth=2.17, sem=wall; [2,1]: depth=4.82, sem=wall; [2,2]: depth=1.78, sem=wall; [2,3]: depth=4.91, sem=door; [2,4]: depth=3.79, sem=wall; [2,5]: depth=1.34, sem=wall
[3,0]: depth=2.16, sem=wall; [3,1]: depth=3.45, sem=floor; [3,2]: depth=1.78, sem=wall; [3,3]: depth=3.27, sem=floor; [3,4]: depth=2.80, sem=wall; [3,5]: depth=1.35, sem=wall
[4,0]: depth=1.47, sem=floor; [4,1]: depth=1.46, sem=floor; [4,2]: depth=1.42, sem=floor; [4,3]: depth=1.47, sem=floor; [4,4]: depth=1.44, sem=floor; [4,5]: depth=1.25, sem=wall
[5,0]: depth=0.86, sem=floor; [5,1]: depth=0.86, sem=floor; [5,2]: depth=0.86, sem=floor; [5