# Prepare scene for gaussian-splatting
## dataset preparation
setting scene ID and Langsplat directory

In [None]:
import os
SCENE = "5eb31827b7" # Change this to switch scenes
LANGSPLAT_ROOT = f"{os.getcwd()}/LangSplat"


Copying the resized images and COLMAP files from the raw dataset folders into a layout expected by the `gaussian-splatting` code. resized images > input & images, colmap files to sparse/0

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

IMG_SRC="$LANGSPLAT_ROOT/data/$SCENE/dslr/resized_images"
COLMAP_SRC="$LANGSPLAT_ROOT/data/$SCENE/dslr/colmap"
DSLR_DIR="$LANGSPLAT_ROOT/data/$SCENE/dslr"

DST="$LANGSPLAT_ROOT/data/$SCENE"

mkdir -p "$DST/images" "$DST/input" "$DST/sparse/0"

rsync -a "$IMG_SRC/" "$DST/images/"
rsync -a "$IMG_SRC/" "$DST/input/"
rsync -a "$COLMAP_SRC/" "$DST/sparse/0/" || true

rm -rf "$DSLR_DIR"

echo "✓ Done. LangSplat data prepared at: $DST"
echo "✓ Removed: $DSLR_DIR"
ls -lh "$DST" || true

### Running colmap undistorter

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

SCENE_ROOT="$LANGSPLAT_ROOT/data/$SCENE"
IMG_DIR="$SCENE_ROOT/images"
SPARSE_DIR="$SCENE_ROOT/sparse/0"
OUT_DIR="$SCENE_ROOT/undistorted"
mkdir -p "$OUT_DIR"
if ! command -v colmap >/dev/null 2>&1; then
  echo "colmap binary not found in PATH" >&2
  exit 1
fi
echo "Running COLMAP image_undistorter in $SCENE_ROOT"
colmap image_undistorter \
  --image_path "$IMG_DIR" \
  --input_path "$SPARSE_DIR" \
  --output_path "$OUT_DIR" \
  --output_type COLMAP
echo "Undistorted data ready under $OUT_DIR"
ls -la "$OUT_DIR" || true

### Moving generated undistorted .bin colmap files to sparse/0 

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

UNDISTORTED_ROOT="$LANGSPLAT_ROOT/data/$SCENE/undistorted"
SPARSE_ROOT="$UNDISTORTED_ROOT/sparse"
TARGET_DIR="$SPARSE_ROOT/0"
mkdir -p "$TARGET_DIR"
rm -rf "$TARGET_DIR"/* || true
for item in "$SPARSE_ROOT"/*; do
  if [ "$item" = "$TARGET_DIR" ]; then
    continue
  fi
  if [ -e "$item" ]; then
    mv -v "$item" "$TARGET_DIR/" || true
  fi
done
rm -f "$TARGET_DIR/cameras.txt" "$TARGET_DIR/images.txt" "$TARGET_DIR/points3D.txt"
ls -la "$TARGET_DIR"

## replacing old files with undistorted files

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

SCENE_ROOT="$LANGSPLAT_ROOT/data/$SCENE"

rm -rf "$SCENE_ROOT/images"
rm -rf "$SCENE_ROOT/input"
rm -rf "$SCENE_ROOT/sparse"

mkdir -p "$SCENE_ROOT/images"
mkdir -p "$SCENE_ROOT/input"
mkdir -p "$SCENE_ROOT/sparse/0"

rsync -a "$SCENE_ROOT/undistorted/images/" "$SCENE_ROOT/images/"
rsync -a "$SCENE_ROOT/undistorted/images/" "$SCENE_ROOT/input/"


echo "4. Copying COLMAP sparse reconstruction..."
rsync -a "$SCENE_ROOT/undistorted/sparse/" "$SCENE_ROOT/sparse/"

rm -rf "$SCENE_ROOT/undistorted"

## Step 1 – Generate language features
preprocess.py to run CLIP + SAM over the normalized dataset.  `ckpts/sam_vit_h_4b8939.pth` must exist before executing.

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

cd "$LANGSPLAT_ROOT"

python preprocess_dino.py \
  --dataset_path "$LANGSPLAT_ROOT/data/$SCENE" \


echo "language features saved: $LANGSPLAT_ROOT/data/${SCENE}/language_features_dino"


## !!! It is recommended to use the original Gaussian_splatting to train rgb scenes !!!

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT"

# Train RGB only - this creates the checkpoint we need
python train.py \
  --source_path "$DATASET" \
  --model_path "$DATASET/output/${SCENE}" \
  --iterations 30000 \
  --test_iterations 7000 30000 \
  --save_iterations 7000 30000 \
  --checkpoint_iterations 30000

echo ""
echo "Checkpoint saved: $DATASET/output/${SCENE}/chkpnt30000.pth"


###  Autoencoder to reduce feature dim from 2048 to 3 dims

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT/autoencoder"

python train.py \
  --dataset_path "$DATASET" \
  --dataset_name "${SCENE}_dino_3d" \
  --encoder_dims 1024 256 64 16 3\
  --decoder_dims 16 64 256 1024 \
  --lr 0.0007 

echo "Checkpoint saved: $LANGSPLAT_ROOT/autoencoder/ckpt/(--dataset_name)"



ModuleList(
  (0): Linear(in_features=2048, out_features=1024, bias=True)
  (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Linear(in_features=1024, out_features=512, bias=True)
  (4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): Linear(in_features=512, out_features=256, bias=True)
  (7): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): ReLU()
  (9): Linear(in_features=256, out_features=128, bias=True)
  (10): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): ReLU()
  (12): Linear(in_features=128, out_features=64, bias=True)
  (13): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): ReLU()
  (15): Linear(in_features=64, out_features=32, bias=True)
) ModuleList(
  (0): Linear(in_features=32, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=

 97%|█████████▋| 97/100 [12:05<00:22,  7.64s/it]

eval_loss:0.06613647


 98%|█████████▊| 98/100 [12:13<00:15,  7.73s/it]

eval_loss:0.06597405


 99%|█████████▉| 99/100 [12:21<00:07,  7.79s/it]

eval_loss:0.06599228


100%|██████████| 100/100 [12:29<00:00,  7.50s/it]


eval_loss:0.06587859
best_epoch: 99
best_loss: 0.06587859
Checkpoint saved: /home/s25mdeyl_hpc/workspace/LangSplat-DINO/LangSplat/autoencoder/ckpt/(--dataset_name)


In [10]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT/autoencoder"
# get the 3-dims language feature of the scene
python test.py \
  --dataset_path "$DATASET" \
  --dataset_name "${SCENE}_dino_3d" \
  --encoder_dims 1024 256 64 16 3\
  --decoder_dims 16 64 256 1024 \
  --output_dir "$DATASET/language_features_dim3"

echo "generated 3d language features: data/$SCENE/language_features_dimX"



ModuleList(
  (0): Linear(in_features=2048, out_features=1024, bias=True)
  (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Linear(in_features=1024, out_features=256, bias=True)
  (4): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=64, bias=True)
  (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): ReLU()
  (9): Linear(in_features=64, out_features=16, bias=True)
  (10): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): ReLU()
  (12): Linear(in_features=16, out_features=3, bias=True)
) ModuleList(
  (0): Linear(in_features=3, out_features=16, bias=True)
  (1): ReLU()
  (2): Linear(in_features=16, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=256, bias=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=1024, b

120it [00:00, 451.23it/s]


generated 3d language features: data/5eb31827b7/language_features_dimX


## Step 2 – Optimize LangSplat from the RGB checkpoint


### getting 3d embeddings from autoencoder generated geatures and  training the langsplat 

In [22]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"
OUTPUT_NAME="${SCENE}_langsplat_dino3d"

cd "$LANGSPLAT_ROOT"

python train.py \
  -s "$DATASET" \
  -m "$LANGSPLAT_ROOT/output/$OUTPUT_NAME" \
  --start_checkpoint "$DATASET/output/$SCENE/chkpnt30000.pth" \
  --feature_level 3 \
  --language_features_name "language_features_dim3" \
  --include_feature \
  --language_feature_dim 3 \
  --iterations 30000 \


echo "Checkpoint saved: $DATASET/output/$OUTPUT_NAME"

Namespace(sh_degree=3, source_path='/home/s25mdeyl_hpc/workspace/LangSplat-DINO/LangSplat/data/5eb31827b7', model_path='/home/s25mdeyl_hpc/workspace/LangSplat-DINO/LangSplat/output/5eb31827b7_langsplat_dino3d', language_features_name='language_features_dim3', images='images', resolution=-1, white_background=False, feature_level=3, data_device='cuda', eval=False, iterations=30000, position_lr_init=0.00016, position_lr_final=1.6e-06, position_lr_delay_mult=0.01, position_lr_max_steps=30000, feature_lr=0.0025, opacity_lr=0.05, language_feature_lr=0.0025, language_feature_dim=3, include_feature=True, scaling_lr=0.005, rotation_lr=0.001, percent_dense=0.01, lambda_dssim=0.2, densification_interval=100, opacity_reset_interval=3000, densify_from_iter=500, densify_until_iter=15000, densify_grad_threshold=0.0002, convert_SHs_python=False, compute_cov3D_python=False, debug=False, ip='127.0.0.1', port=55555, debug_from=-1, detect_anomaly=False, test_iterations=[7000, 30000], save_iterations=[7000

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Training progress: 100%|██████████| 30000/30000 [53:24<00:00,  9.36it/s, Loss=0.0948561] ess:   1%|          | 360/30000 [00:41<56:19,  8.77it/s, Loss=0.2172887]gress:   1%|          | 370/30000 [00:42<55:38,  8.88it/s, Loss=0.2688345]progress:   9%|▉         | 2710/30000 [04:54<48:03,  9.47it/s, Loss=0.2513341]g progress:  13%|█▎        | 3770/30000 [06:46<46:28,  9.41it/s, Loss=0.1564173]g progress:  18%|█▊        | 5410/30000 [09:40<43:36,  9.40it/s, Loss=0.1576423]aining progress:  35%|███▌      | 10630/30000 [19:06<34:03,  9.48it/s, Loss=0.1300755]aining progress:  37%|███▋      | 10960/30000 [19:41<33:34,  9.45it/s, Loss=0.1324335]aining progress:  38%|███▊      | 11460/30000 [20:34<32:50,  9.41it/s, Loss=0.1040754]aining progress:  38%|███▊      | 11470/30000 [20:35<32:43,  9.44it/s, Loss=0.1111958]aining progress:  39%|███▉      | 11700/30000 [20:59<32:28,  9.39it/s, Loss=0.1516707]aining progress:  41%|████

testing for iter 7000 [17/01 18:38:24]

[ITER 7000] Evaluating train: L1 0.009204849228262901 PSNR 35.394937896728514 [17/01 18:38:32]

[ITER 7000] Saving Gaussians [17/01 18:38:32]

[ITER 7000] Saving Checkpoint [17/01 18:38:36]
testing for iter 30000 [17/01 19:19:19]

[ITER 30000] Evaluating train: L1 0.009204849228262901 PSNR 35.394937896728514 [17/01 19:19:24]

[ITER 30000] Saving Gaussians [17/01 19:19:24]

[ITER 30000] Saving Checkpoint [17/01 19:19:27]

Training complete. [17/01 19:19:27]
