# VisualOdometrySLAM

In [27]:
import random

import cv2
import numpy as np
import pandas as pd

def seed_all(seed: int) -> None:
    np.random.seed(seed)
    random.seed(seed)
    cv2.setRNGSeed(seed)

In [28]:
SEED = 42
seed_all(SEED)

In [29]:
from pathlib import Path

DATA_PATH = Path('../data/')
DATA_PATH.mkdir(parents=True, exist_ok=True)

DATA_PATH_VIDEO = DATA_PATH /Path('visual_odometry/')
DATA_PATH_VIDEO.mkdir(parents=True, exist_ok=True)

DATA_PATH_OUTPUT = DATA_PATH / Path('output_data/')
DATA_PATH_OUTPUT.mkdir(parents=True, exist_ok=True)

DATA_PATH_SAVE_MODELS = DATA_PATH / Path('models/')
DATA_PATH_SAVE_MODELS.mkdir(parents=True, exist_ok=True)

DATA_IMGS = Path('../imgs')
DATA_IMGS.mkdir(parents=True, exist_ok=True)

In [30]:
import sys
import os

project_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_path)

# Основной код

In [31]:
from visual_slam.calibration import UniversalCalibration
from visual_slam.source import DatasetSource,  VideoSource
from visual_slam.camera import PinholeCamera
from visual_slam.config import Config
from visual_slam.map.map import Map
from visual_slam.map.keyframe import KeyFrame
from visual_slam.map.map_point import MapPoint
from visual_slam.feature.tracker import FeatureTracker
from visual_slam.slam import SLAM
from visual_slam.viz.map_viz import MapVisualizer

In [None]:
VIDEO_PATH = DATA_PATH / 'KITTI_sequence_1/image_l'
CALIBRATION_PATH = DATA_PATH / 'KITTI_sequence_1/calib.txt'

calibration = UniversalCalibration().load_from(path=CALIBRATION_PATH)
video_source = DatasetSource(path=VIDEO_PATH)

In [None]:
# VIDEO_PATH = DATA_PATH / 'visual_odometry/hallway.mp4'
# CALIBRATION_PATH = DATA_PATH / 'visual_odometry/calibration.yaml'

# calibration = UniversalCalibration().load_from(path=CALIBRATION_PATH)
# video_source = VideoSource(video_path=VIDEO_PATH, target_fps=1.0)

In [34]:
print(  f"Video frame size: {video_source.get_frame_shape()}"  )
video_source.show(fps=1.0)

Video frame size: (1280, 720, 3)


In [None]:
from visual_slam.config import Config, CameraConfig, FeatureConfig, TrackingConfig, MapConfig, LoopClosingConfig, OptimizationConfig, LocalMappingConfig

config = Config(
    features=FeatureConfig(
        detector="fast_orb_anms",
        matcher="bf-hamming",
        detector_params={
            "nfeatures": 500,
            "use_anms": True,
            "anms_count": 1000,
            "anms_tolerance": 0.3,
        },
        matcher_params={
            "ratio_thresh": 0.75,
            "cross_check": False
        },
        filtered_params={
            "use_grid": False,
            "use_nms": False,            
            
            "use_ransac_fund_matrix": True,
            "use_orientation": True
        }
    ),
    tracking=TrackingConfig(
        min_inliers=10,
        min_parallax_deg=0.5,
        keyframe_interval=1,
        min_inlier_ratio=0.25,
        max_reprojection_error=5.0,
        use_ransac=True,
        
        max_translation_for_kf=2.0,
        max_rotation_for_kf=10.0,
        min_matches_for_kf=30,
    ),
    local_mapping=LocalMappingConfig(
        run_timeout=0.5,
        max_neighbors=5
    ),
    map=MapConfig(

    ),
    optimization=OptimizationConfig(
        lr=1e-4,
        n_iter=10,
        batch_size=64,
        huber_delta=5.0
    ),
    loop_closing=LoopClosingConfig(
        
    ),
    debug=True
)

In [None]:
from time import sleep
from typing import Union

class Processing:
    
    def __init__(
        self, 
        video_path: Union[str, Path],
        calibration_file: Union[str, Path],                   
        config: Config,
    ):
        
        self.config = config
        
        self.video_source = DatasetSource(video_path)
        self.calibration = UniversalCalibration().load_from(calibration_file)
        K = self.calibration.mono.K
        h, w, _ = video_source.get_frame_shape()
        self.camera = PinholeCamera(
            width=w,
            height=h,
            fx=K[0, 0],
            fy=K[1, 1],
            cx=K[0, 2],
            cy=K[1, 2],
            dist_coeffs=self.calibration.mono.D,
        )
        
        self.slam = SLAM(
            camera=self.camera,
            config=self.config,
        )    
    
    def run(self):
        print("=== Запуск SLAM ===")

        total_frames = self.video_source.num_frames()
        print(f"Всего кадров: {total_frames}")

        count = 0
        max_cycles = 10
        
        while self.video_source.is_ok():
            img, timestamp = self.video_source.get_frame()
            if img is None:
                break

            result = self.slam.track([img], timestamp)
            count += 1
            print(f"#{count} Timestamp={timestamp:.2f} → Result={result}")

            if count >= max_cycles:
                print(f"Остановлено: достигнуто {max_cycles} циклов.")
                break
            sleep(1.0)
        self.slam.shutdown()
        print("=== SLAM завершён ===")
        
        
        
    

In [None]:
# from time import sleep
# from typing import Union

# class Processing:
    
#     def __init__(
#         self, 
#         video_path: Union[str, Path],
#         calibration_file: Union[str, Path],                   
#         config: Config,
#     ):
        
#         self.config = config
        
#         self.video_source = VideoSource(video_path, target_fps=2.0)
#         self.calibration = UniversalCalibration().load_from(calibration_file)
#         K = self.calibration.mono.K
#         h, w, _ = self.video_source.get_frame_shape()
#         self.camera = PinholeCamera(
#             width=w,
#             height=h,
#             fx=K[0, 0],
#             fy=K[1, 1],
#             cx=K[0, 2],
#             cy=K[1, 2],
#             dist_coeffs=self.calibration.mono.D,
#         )
        
#         self.slam = SLAM(
#             camera=self.camera,
#             config=self.config,
#         )    
    
#     def run(self):
#         print("=== Запуск SLAM ===")

#         total_frames = self.video_source.num_frames()
#         print(f"Всего кадров: {total_frames}")

#         count = 0
#         max_cycles = 10
#         while self.video_source.is_ok():
#             img, timestamp = self.video_source.get_frame()
#             if img is None:
#                 break

#             result = self.slam.track([img], timestamp)
#             count += 1
#             print(f"#{count} Timestamp={timestamp:.2f} → Result={result}")

#             if count >= max_cycles:
#                 print(f"Остановлено: достигнуто {max_cycles} циклов.")
#                 break
#             sleep(1.0)
#         self.slam.shutdown()
#         print("=== SLAM завершён ===")
        
        
        
    

In [48]:
process = Processing(
    video_path=VIDEO_PATH,
    calibration_file=CALIBRATION_PATH,
    config=config
)
process.run()

=== Запуск SLAM ===
Всего кадров: 544
#1 Timestamp=0.00 → Result=<StateItem NOT_INITIALIZED (1): Система не инициализирована>
#2 Timestamp=0.50 → Result=<StateItem OK (3): Трекинг в норме>
#3 Timestamp=1.00 → Result=<StateItem OK (3): Трекинг в норме>
#4 Timestamp=1.50 → Result=<StateItem OK (3): Трекинг в норме>
#5 Timestamp=2.00 → Result=<StateItem OK (3): Трекинг в норме>
#6 Timestamp=2.50 → Result=<StateItem OK (3): Трекинг в норме>
#7 Timestamp=3.00 → Result=<StateItem OK (3): Трекинг в норме>
#8 Timestamp=3.50 → Result=<StateItem OK (3): Трекинг в норме>
#9 Timestamp=4.00 → Result=<StateItem OK (3): Трекинг в норме>
#10 Timestamp=4.50 → Result=<StateItem OK (3): Трекинг в норме>
Остановлено: достигнуто 10 циклов.
=== SLAM завершён ===


In [49]:
map = process.slam.map
viz = MapVisualizer(map)

In [50]:
map

<Map | points=983, keyframes=10>

In [51]:
# map.get_points()

In [52]:
map.get_keyframes()

[<KeyFrame id=7, frame_id=21, mps=416, time=0.000> pose t=[0. 0. 0.],
 <KeyFrame id=8, frame_id=22, mps=536, time=0.500> pose t=[ 0.077  0.178 -0.982],
 <KeyFrame id=9, frame_id=24, mps=477, time=1.000> pose t=[-0.156  0.393 -3.812],
 <KeyFrame id=10, frame_id=26, mps=323, time=1.500> pose t=[ 0.06   0.845 -7.064],
 <KeyFrame id=11, frame_id=28, mps=180, time=2.000> pose t=[  0.311   1.444 -11.583],
 <KeyFrame id=12, frame_id=30, mps=136, time=2.500> pose t=[ 9.8600e-01  4.0000e-03 -1.3627e+01],
 <KeyFrame id=13, frame_id=32, mps=117, time=3.000> pose t=[  1.435   0.71  -18.251],
 <KeyFrame id=14, frame_id=34, mps=130, time=3.500> pose t=[  0.725   1.388 -22.584],
 <KeyFrame id=15, frame_id=36, mps=105, time=4.000> pose t=[  0.872   2.694 -25.721],
 <KeyFrame id=16, frame_id=38, mps=71, time=4.500> pose t=[  0.931   3.081 -29.785]]

In [53]:
viz.show_scene()



In [54]:
kfs = process.slam.map.get_keyframes()
kfs

[<KeyFrame id=7, frame_id=21, mps=416, time=0.000> pose t=[0. 0. 0.],
 <KeyFrame id=8, frame_id=22, mps=536, time=0.500> pose t=[ 0.077  0.178 -0.982],
 <KeyFrame id=9, frame_id=24, mps=477, time=1.000> pose t=[-0.156  0.393 -3.812],
 <KeyFrame id=10, frame_id=26, mps=323, time=1.500> pose t=[ 0.06   0.845 -7.064],
 <KeyFrame id=11, frame_id=28, mps=180, time=2.000> pose t=[  0.311   1.444 -11.583],
 <KeyFrame id=12, frame_id=30, mps=136, time=2.500> pose t=[ 9.8600e-01  4.0000e-03 -1.3627e+01],
 <KeyFrame id=13, frame_id=32, mps=117, time=3.000> pose t=[  1.435   0.71  -18.251],
 <KeyFrame id=14, frame_id=34, mps=130, time=3.500> pose t=[  0.725   1.388 -22.584],
 <KeyFrame id=15, frame_id=36, mps=105, time=4.000> pose t=[  0.872   2.694 -25.721],
 <KeyFrame id=16, frame_id=38, mps=71, time=4.500> pose t=[  0.931   3.081 -29.785]]

In [55]:
K = process.slam.tracking.camera.get_intrinsics()

viz.show_matches(
    kf_ref=kfs[4],
    kf_cur=kfs[5],
    K=K,
    window_name="Initialization Debug",
    font_scale=0.5,       # размер текста для номеров
    wait_key=True          # нажми 'q' для закрытия окна
)


array([[[  3,   0,   1],
        [  3,   0,   1],
        [  3,   0,   1],
        ...,
        [ 60,  69,  75],
        [ 60,  69,  75],
        [ 60,  69,  75]],

       [[  3,   0,   1],
        [  3,   0,   1],
        [  3,   0,   1],
        ...,
        [ 60,  69,  75],
        [ 60,  69,  75],
        [ 60,  69,  75]],

       [[  3,   0,   1],
        [  3,   0,   1],
        [  3,   0,   1],
        ...,
        [ 60,  69,  75],
        [ 60,  69,  75],
        [ 60,  69,  75]],

       ...,

       [[ 78,  84,  88],
        [ 78,  84,  88],
        [ 78,  84,  88],
        ...,
        [ 96,  99, 104],
        [ 95,  98, 103],
        [ 93,  96, 101]],

       [[ 78,  84,  88],
        [ 78,  84,  88],
        [ 78,  84,  88],
        ...,
        [ 96,  99, 104],
        [ 96,  99, 104],
        [ 95,  98, 103]],

       [[ 78,  84,  88],
        [ 78,  84,  88],
        [ 78,  84,  88],
        ...,
        [ 96,  99, 104],
        [ 96,  99, 104],
        [ 96,  99, 104]]