# Prepare scene for gaussian-splatting
## dataset preparation
setting scene ID and Langsplat directory

In [None]:
import os
SCENE = "3c8d535d49" # Change this to switch scenes
LANGSPLAT_ROOT = f"{os.getcwd()}/LangSplat"


Copying the resized images and COLMAP files from the raw dataset folders into a layout expected by the `gaussian-splatting` code. resized images > images, colmap files to sparse/0

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

IMG_SRC="$LANGSPLAT_ROOT/data/$SCENE/dslr/resized_images"
COLMAP_SRC="$LANGSPLAT_ROOT/data/$SCENE/dslr/colmap"
DSLR_DIR="$LANGSPLAT_ROOT/data/$SCENE/dslr"

DST="$LANGSPLAT_ROOT/data/$SCENE"

mkdir -p "$DST/images" "$DST/sparse/0"

rsync -a "$IMG_SRC/" "$DST/images/"
rsync -a "$COLMAP_SRC/" "$DST/sparse/0/" || true

rm -rf "$DSLR_DIR"

echo "✓ Done. LangSplat data prepared at: $DST"
echo "✓ Removed: $DSLR_DIR"
ls -lh "$DST" || true

### Running colmap undistorter

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

SCENE_ROOT="$LANGSPLAT_ROOT/data/$SCENE"
IMG_DIR="$SCENE_ROOT/images"
SPARSE_DIR="$SCENE_ROOT/sparse/0"
OUT_DIR="$SCENE_ROOT/undistorted"
mkdir -p "$OUT_DIR"
if ! command -v colmap >/dev/null 2>&1; then
  echo "colmap binary not found in PATH" >&2
  exit 1
fi
echo "Running COLMAP image_undistorter in $SCENE_ROOT"
colmap image_undistorter \
  --image_path "$IMG_DIR" \
  --input_path "$SPARSE_DIR" \
  --output_path "$OUT_DIR" \
  --output_type COLMAP
echo "Undistorted data ready under $OUT_DIR"
ls -la "$OUT_DIR" || true

### Moving generated undistorted .bin colmap files to sparse/0 

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

UNDISTORTED_ROOT="$LANGSPLAT_ROOT/data/$SCENE/undistorted"
SPARSE_ROOT="$UNDISTORTED_ROOT/sparse"
TARGET_DIR="$SPARSE_ROOT/0"
mkdir -p "$TARGET_DIR"
rm -rf "$TARGET_DIR"/* || true
for item in "$SPARSE_ROOT"/*; do
  if [ "$item" = "$TARGET_DIR" ]; then
    continue
  fi
  if [ -e "$item" ]; then
    mv -v "$item" "$TARGET_DIR/" || true
  fi
done
rm -f "$TARGET_DIR/cameras.txt" "$TARGET_DIR/images.txt" "$TARGET_DIR/points3D.txt"
ls -la "$TARGET_DIR"

## replacing old files with undistorted files

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

SCENE_ROOT="$LANGSPLAT_ROOT/data/$SCENE"

rm -rf "$SCENE_ROOT/images"
rm -rf "$SCENE_ROOT/sparse"

mkdir -p "$SCENE_ROOT/images"
mkdir -p "$SCENE_ROOT/sparse/0"

rsync -a "$SCENE_ROOT/undistorted/images/" "$SCENE_ROOT/images/"


echo "4. Copying COLMAP sparse reconstruction..."
rsync -a "$SCENE_ROOT/undistorted/sparse/" "$SCENE_ROOT/sparse/"

rm -rf "$SCENE_ROOT/undistorted"

## Step 1 – Generate language features
preprocess.py to run CLIP + SAM over the normalized dataset.  `ckpts/sam_vit_h_4b8939.pth` must exist before executing.

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

cd "$LANGSPLAT_ROOT"

python preprocess_dino.py \
  --dataset_path "$LANGSPLAT_ROOT/data/$SCENE" \


echo "language features saved: $LANGSPLAT_ROOT/data/${SCENE}/language_features_dino"


## !!! It is recommended to use the original Gaussian_splatting to train rgb scenes !!!

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT"

# Train RGB only - this creates the checkpoint we need
python train.py \
  --source_path "$DATASET" \
  --model_path "$DATASET/output/${SCENE}" \
  --iterations 30000 \
  --test_iterations 7000 30000 \
  --save_iterations 7000 30000 \
  --checkpoint_iterations 30000

echo ""
echo "Checkpoint saved: $DATASET/output/${SCENE}/chkpnt30000.pth"


###  Autoencoder to reduce feature dim from 2048 to 3 dims

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT/autoencoder"

python train.py \
  --dataset_path "$DATASET" \
  --dataset_name "${SCENE}_dino_3d" \
  --encoder_dims 1024 256 64 16 3\
  --decoder_dims 16 64 256 1024 \
  --lr 0.0007 

echo "Checkpoint saved: $LANGSPLAT_ROOT/autoencoder/ckpt/(--dataset_name)"

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"

cd "$LANGSPLAT_ROOT/autoencoder"
# get the 3-dims language feature of the scene
python test.py \
  --dataset_path "$DATASET" \
  --dataset_name "${SCENE}_dino_3d" \
  --encoder_dims 1024 256 64 16 3\
  --decoder_dims 16 64 256 1024 \
  --output_dir "$DATASET/language_features_dim3"

echo "generated 3d language features: data/$SCENE/language_features_dimX"

## Step 2 – Optimize LangSplat from the RGB checkpoint


### getting 3d embeddings from autoencoder generated geatures and  training the langsplat 

In [None]:
%%bash -s "$LANGSPLAT_ROOT" "$SCENE"
set -euo pipefail

LANGSPLAT_ROOT=$1
SCENE=$2

DATASET="$LANGSPLAT_ROOT/data/$SCENE"
OUTPUT_NAME="${SCENE}_langsplat_dino3d"

cd "$LANGSPLAT_ROOT"

python train.py \
  -s "$DATASET" \
  -m "$LANGSPLAT_ROOT/output/$OUTPUT_NAME" \
  --start_checkpoint "$DATASET/output/$SCENE/chkpnt30000.pth" \
  --feature_level 3 \
  --language_features_name "language_features_dim3" \
  --include_feature \
  --language_feature_dim 3 \
  --iterations 30000 \


echo "Checkpoint saved: $DATASET/output/$OUTPUT_NAME"