In [1]:
import os
import sys
import time
from argparse import ArgumentParser
from datetime import datetime

import torch
import torch.multiprocessing as mp
import yaml
import numpy as np
from munch import munchify

import wandb
from gaussian_splatting.scene.gaussian_model import GaussianModel
from gaussian_splatting.utils.system_utils import mkdir_p
from gui import gui_utils, slam_gui
from utils.config_utils import load_config
from utils.dataset import load_dataset
from utils.eval_utils import eval_ate, eval_rendering, save_gaussians
from utils.logging_utils import Log
from utils.multiprocessing_utils import FakeQueue
from utils.slam_backend import BackEnd
from utils.slam_frontend import FrontEnd


class SLAM:
    def __init__(self, config, save_dir=None):
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)

        start.record()

        self.config = config
        self.save_dir = save_dir
        model_params = munchify(config["model_params"])
        opt_params = munchify(config["opt_params"])
        pipeline_params = munchify(config["pipeline_params"])
        self.model_params, self.opt_params, self.pipeline_params = (
            model_params,
            opt_params,
            pipeline_params,
        )

        self.live_mode = self.config["Dataset"]["type"] == "realsense"
        self.monocular = self.config["Dataset"]["sensor_type"] == "monocular"
        self.use_spherical_harmonics = self.config["Training"]["spherical_harmonics"]
        self.use_gui = self.config["Results"]["use_gui"]
        if self.live_mode:
            self.use_gui = True
        self.eval_rendering = self.config["Results"]["eval_rendering"]

        model_params.sh_degree = 3 if self.use_spherical_harmonics else 0

        self.gaussians = GaussianModel(model_params.sh_degree, config=self.config)
        self.gaussians.init_lr(6.0)
        self.dataset = load_dataset(
            model_params, model_params.source_path, config=config
        )

        self.gaussians.training_setup(opt_params)
        bg_color = [0, 0, 0]
        self.background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

        frontend_queue = mp.Queue()
        backend_queue = mp.Queue()

        q_main2vis = mp.Queue() if self.use_gui else FakeQueue()
        q_vis2main = mp.Queue() if self.use_gui else FakeQueue()

        self.config["Results"]["save_dir"] = save_dir
        self.config["Training"]["monocular"] = self.monocular

        self.frontend = FrontEnd(self.config)
        self.backend = BackEnd(self.config)

        self.frontend.dataset = self.dataset
        self.frontend.background = self.background
        self.frontend.pipeline_params = self.pipeline_params
        self.frontend.frontend_queue = frontend_queue
        self.frontend.backend_queue = backend_queue
        self.frontend.q_main2vis = q_main2vis
        self.frontend.q_vis2main = q_vis2main
        self.frontend.set_hyperparams()

        self.backend.gaussians = self.gaussians
        self.backend.background = self.background
        self.backend.cameras_extent = 6.0
        self.backend.pipeline_params = self.pipeline_params
        self.backend.opt_params = self.opt_params
        self.backend.frontend_queue = frontend_queue
        self.backend.backend_queue = backend_queue
        self.backend.live_mode = self.live_mode

        self.backend.set_hyperparams()

        self.params_gui = gui_utils.ParamsGUI(
            pipe=self.pipeline_params,
            background=self.background,
            gaussians=self.gaussians,
            q_main2vis=q_main2vis,
            q_vis2main=q_vis2main,
        )

        backend_process = mp.Process(target=self.backend.run)
        if self.use_gui:
            gui_process = mp.Process(target=slam_gui.run, args=(self.params_gui,))
            gui_process.start()
            time.sleep(5)

        backend_process.start()
        self.frontend.run()
        backend_queue.put(["pause"])

        end.record()
        torch.cuda.synchronize()
        # empty the frontend queue
        N_frames = len(self.frontend.cameras)
        FPS = N_frames / (start.elapsed_time(end) * 0.001)
        Log("Total time", start.elapsed_time(end) * 0.001, tag="Eval")
        Log("Total FPS", N_frames / (start.elapsed_time(end) * 0.001), tag="Eval")

        if self.eval_rendering:
            self.gaussians = self.frontend.gaussians
            kf_indices = self.frontend.kf_indices
            ATE = eval_ate(
                self.frontend.cameras,
                self.frontend.kf_indices,
                self.save_dir,
                0,
                final=True,
                monocular=self.monocular,
            )

            rendering_result = eval_rendering(
                self.frontend.cameras,
                self.gaussians,
                self.dataset,
                self.save_dir,
                self.pipeline_params,
                self.background,
                kf_indices=kf_indices,
                iteration="before_opt",
            )
            columns = ["tag", "psnr", "ssim", "lpips", "RMSE ATE", "FPS"]
            metrics_table = wandb.Table(columns=columns)
            metrics_table.add_data(
                "Before",
                rendering_result["mean_psnr"],
                rendering_result["mean_ssim"],
                rendering_result["mean_lpips"],
                ATE,
                FPS,
            )

            # re-used the frontend queue to retrive the gaussians from the backend.
            while not frontend_queue.empty():
                frontend_queue.get()
            backend_queue.put(["color_refinement"])
            while True:
                if frontend_queue.empty():
                    time.sleep(0.01)
                    continue
                data = frontend_queue.get()
                if data[0] == "sync_backend" and frontend_queue.empty():
                    gaussians = data[1]
                    self.gaussians = gaussians
                    break

            rendering_result = eval_rendering(
                self.frontend.cameras,
                self.gaussians,
                self.dataset,
                self.save_dir,
                self.pipeline_params,
                self.background,
                kf_indices=kf_indices,
                iteration="after_opt",
            )
            metrics_table.add_data(
                "After",
                rendering_result["mean_psnr"],
                rendering_result["mean_ssim"],
                rendering_result["mean_lpips"],
                ATE,
                FPS,
            )
            wandb.log({"Metrics": metrics_table})
            save_gaussians(self.gaussians, self.save_dir, "final_after_opt", final=True)

        backend_queue.put(["stop"])
        backend_process.join()
        Log("Backend stopped and joined the main thread")
        if self.use_gui:
            q_main2vis.put(gui_utils.GaussianPacket(finish=True))
            gui_process.join()
            Log("GUI Stopped and joined the main thread")

    def run(self):
        pass




Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [1]:
1+1

2

In [2]:
sys.argv = [
    "slam.py", 
    "--config", "configs/mono/tum/fr3_office.yaml"
]

# Set up command line argument parser
parser = ArgumentParser(description="Training script parameters")
parser.add_argument("--config", type=str, required=True, help="Path to the config file")
parser.add_argument("--eval", action="store_true", help="Run in evaluation mode")

args = parser.parse_args()

mp.set_start_method("spawn")

with open(args.config, "r") as yml:
    config = yaml.safe_load(yml)

config = load_config(args.config)
save_dir = None

if args.eval:
    Log("Running MonoGS in Evaluation Mode")
    Log("Following config will be overridden")
    Log("\tsave_results=True")
    config["Results"]["save_results"] = True
    Log("\tuse_gui=False")
    config["Results"]["use_gui"] = False
    Log("\teval_rendering=True")
    config["Results"]["eval_rendering"] = True
    Log("\tuse_wandb=True")
    config["Results"]["use_wandb"] = True

In [5]:
Model = GaussianModel(0, config)
Model.load_ply("results/datasets_tum/2024-06-18-23-04-50/point_cloud/final/point_cloud.ply")

In [6]:
model_params = munchify(config["model_params"])
pipeline_params = munchify(config["pipeline_params"])
dataset = load_dataset(
    model_params, model_params.source_path, config=config
)
bg_color = [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
dataset[0]

(tensor([[[0.6196, 0.6235, 0.6118,  ..., 0.6863, 0.7098, 0.7137],
          [0.6314, 0.6314, 0.6196,  ..., 0.7059, 0.7216, 0.7255],
          [0.6196, 0.6196, 0.6196,  ..., 0.6980, 0.7176, 0.8000],
          ...,
          [0.4471, 0.4431, 0.4471,  ..., 0.4667, 0.4627, 0.4627],
          [0.4510, 0.4471, 0.4471,  ..., 0.4667, 0.4627, 0.4627],
          [0.4431, 0.4314, 0.4353,  ..., 0.4706, 0.4627, 0.4588]],
 
         [[0.6314, 0.6392, 0.6392,  ..., 0.7451, 0.7490, 0.7529],
          [0.6471, 0.6471, 0.6471,  ..., 0.7765, 0.7765, 0.7804],
          [0.6392, 0.6431, 0.6471,  ..., 0.7804, 0.7843, 0.8667],
          ...,
          [0.4471, 0.4431, 0.4471,  ..., 0.4706, 0.4706, 0.4745],
          [0.4588, 0.4549, 0.4549,  ..., 0.4745, 0.4745, 0.4745],
          [0.4510, 0.4549, 0.4588,  ..., 0.4745, 0.4784, 0.4745]],
 
         [[0.6157, 0.6157, 0.6196,  ..., 0.8824, 0.9137, 0.9216],
          [0.6353, 0.6157, 0.6314,  ..., 0.9020, 0.9569, 0.9569],
          [0.6392, 0.6314, 0.6353,  ...,

In [7]:
from gaussian_splatting.utils.graphics_utils import getProjectionMatrix2
from gaussian_splatting.gaussian_renderer import render
from utils.camera_utils import Camera
from utils.slam_utils import get_loss_tracking, get_median_depth
from utils.pose_utils import update_pose

In [18]:
cur_frame_idx = 0
projection_matrix = getProjectionMatrix2(
    znear=0.01,
    zfar=100.0,
    fx=dataset.fx,
    fy=dataset.fy,
    cx=dataset.cx,
    cy=dataset.cy,
    W=dataset.width,
    H=dataset.height,
).transpose(0, 1)
projection_matrix = projection_matrix.to(device="cuda:0")

viewpoint = Camera.init_from_dataset(
    dataset, 0, projection_matrix)
viewpoint.compute_grad_mask(config)

In [48]:
orig = viewpoint.original_image


# Move the tensor to CPU
orig = orig.cpu()

# Convert the tensor to a NumPy array
orig = orig.detach().numpy()

# Transpose the dimensions to [640, 480, 3]
orig = np.transpose(orig, (1, 2, 0))

# Display the image using Matplotlib
plt.imshow(orig)
plt.axis('off')  # Hide axes
plt.show()

In [46]:
viewpoint.update_RT(viewpoint.R_gt, viewpoint.T_gt)


opt_params = []
opt_params.append(
    {
        "params": [viewpoint.cam_rot_delta],
        "lr": config["Training"]["lr"]["cam_rot_delta"],
        "name": "rot_{}".format(viewpoint.uid),
    }
)
opt_params.append(
    {
        "params": [viewpoint.cam_trans_delta],
        "lr": config["Training"]["lr"]["cam_trans_delta"],
        "name": "trans_{}".format(viewpoint.uid),
    }
)
opt_params.append(
    {
        "params": [viewpoint.exposure_a],
        "lr": 0.01,
        "name": "exposure_a_{}".format(viewpoint.uid),
    }
)
opt_params.append(
    {
        "params": [viewpoint.exposure_b],
        "lr": 0.01,
        "name": "exposure_b_{}".format(viewpoint.uid),
    }
)


pose_optimizer = torch.optim.Adam(opt_params)
for tracking_itr in range(1000):
    render_pkg = render(
        viewpoint, Model, pipeline_params, background
    )
    image, depth, opacity = (
        render_pkg["render"],
        render_pkg["depth"],
        render_pkg["opacity"],
    )
    pose_optimizer.zero_grad()
    loss_tracking = get_loss_tracking(
        config, image, depth, opacity, viewpoint
    )
    loss_tracking.backward()

    with torch.no_grad():
        pose_optimizer.step()
        converged = update_pose(viewpoint)

    if converged:
        print(tracking_itr)
        break

median_depth = get_median_depth(depth, opacity)
render_pkg

285


{'render': tensor([[[0.6659, 0.6733, 0.6825,  ..., 0.6500, 0.7576, 0.8512],
          [0.6719, 0.6790, 0.6873,  ..., 0.6952, 0.7934, 0.8758],
          [0.6784, 0.6847, 0.6918,  ..., 0.7337, 0.8234, 0.8959],
          ...,
          [0.4658, 0.4659, 0.4659,  ..., 0.5131, 0.5122, 0.5111],
          [0.4657, 0.4658, 0.4658,  ..., 0.5127, 0.5118, 0.5108],
          [0.4656, 0.4657, 0.4657,  ..., 0.5125, 0.5116, 0.5106]],
 
         [[0.6996, 0.7020, 0.7051,  ..., 0.6746, 0.7869, 0.8828],
          [0.6991, 0.7015, 0.7046,  ..., 0.7180, 0.8194, 0.9027],
          [0.6978, 0.7003, 0.7032,  ..., 0.7549, 0.8466, 0.9192],
          ...,
          [0.4745, 0.4746, 0.4747,  ..., 0.5063, 0.5053, 0.5042],
          [0.4743, 0.4745, 0.4746,  ..., 0.5059, 0.5048, 0.5038],
          [0.4741, 0.4743, 0.4744,  ..., 0.5056, 0.5045, 0.5034]],
 
         [[0.8077, 0.7998, 0.7903,  ..., 0.7001, 0.8136, 0.9112],
          [0.7919, 0.7854, 0.7779,  ..., 0.7420, 0.8433, 0.9262],
          [0.7739, 0.7694, 0.7

In [47]:
render_pkg["render"].shape
import matplotlib.pyplot as plt
# Get the image tensor
image_tensor = render_pkg["render"]

# Move the tensor to CPU
image_tensor = image_tensor.cpu()

# Convert the tensor to a NumPy array
image_np = image_tensor.detach().numpy()

# Transpose the dimensions to [640, 480, 3]
image_np = np.transpose(image_np, (1, 2, 0))

# Display the image using Matplotlib
plt.imshow(image_np)
plt.axis('off')  # Hide axes
plt.show()

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


In [17]:
print(viewpoint.grad_mask)

None


In [14]:
config["Training"]["monocular"]

True

In [51]:
viewpoint.R_gt

tensor([[-0.9996,  0.0152,  0.0247],
        [-0.0142,  0.4855, -0.8741],
        [-0.0253, -0.8741, -0.4851]], device='cuda:0', dtype=torch.float64)

In [2]:
if __name__ == "__main__":
    # Simulate command line arguments
    sys.argv = [
        "slam.py", 
        "--config", "configs/mono/tum/fr3_office.yaml"
    ]

    # Set up command line argument parser
    parser = ArgumentParser(description="Training script parameters")
    parser.add_argument("--config", type=str, required=True, help="Path to the config file")
    parser.add_argument("--eval", action="store_true", help="Run in evaluation mode")

    args = parser.parse_args()

    mp.set_start_method("spawn")

    with open(args.config, "r") as yml:
        config = yaml.safe_load(yml)

    config = load_config(args.config)
    save_dir = None

    if args.eval:
        Log("Running MonoGS in Evaluation Mode")
        Log("Following config will be overridden")
        Log("\tsave_results=True")
        config["Results"]["save_results"] = True
        Log("\tuse_gui=False")
        config["Results"]["use_gui"] = False
        Log("\teval_rendering=True")
        config["Results"]["eval_rendering"] = True
        Log("\tuse_wandb=True")
        config["Results"]["use_wandb"] = True

    if config["Results"]["save_results"]:
        mkdir_p(config["Results"]["save_dir"])
        current_datetime = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        path = config["Dataset"]["dataset_path"].split("/")
        save_dir = os.path.join(
            config["Results"]["save_dir"], path[-3] + "_" + path[-2], current_datetime
        )
        tmp = args.config
        tmp = tmp.split(".")[0]
        config["Results"]["save_dir"] = save_dir
        mkdir_p(save_dir)
        with open(os.path.join(save_dir, "config.yml"), "w") as file:
            documents = yaml.dump(config, file)
        Log("saving results in " + save_dir)
        run = wandb.init(
            project="MonoGS",
            name=f"{tmp}_{current_datetime}",
            config=config,
            mode=None if config["Results"]["use_wandb"] else "disabled",
        )
        wandb.define_metric("frame_idx")
        wandb.define_metric("ate*", step_metric="frame_idx")

    slam = SLAM(config, save_dir=save_dir)

    slam.run()
    wandb.finish()
    save_gaussians(slam.gaussians, slam.save_dir, "final_after_opt", final=True)

    # All done
    Log("Done.")


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


FEngine (64 bits) created at 0x556846d1fd60 (threading is enabled)
FEngine resolved backend: OpenGL
MonoGS: Resetting the system
MonoGS: Initialized map
MonoGS: Resetting the opacity of non-visible Gaussians
MonoGS: Performing initial BA for initialization
MonoGS: Initialized SLAM


MonoGS: Resetting the opacity of non-visible Gaussians


MonoGS: Resetting the opacity of non-visible Gaussians


MonoGS: Resetting the opacity of non-visible Gaussians


[W CudaIPCTypes.cpp:92] Producer process tried to deallocate over 1000 memory blocks referred by consumer processes. Deallocation might be significantly slowed down. We assume it will never going to be the case, but if it is, please file but to https://github.com/pytorch/pytorch


MonoGS: Resetting the opacity of non-visible Gaussians


MonoGS: Resetting the opacity of non-visible Gaussians


MonoGS: Resetting the opacity of non-visible Gaussians


  fig = plt.figure()


MonoGS: Resetting the opacity of non-visible Gaussians


GUI: Received terminate signal
GUI: Closing Visualization


KeyboardInterrupt: 

In [72]:
Model.get_xyz

tensor([], device='cuda:0')