In [None]:
!nvidia-smi

In [None]:
import os
HOME = os.getcwd()
print(HOME)

In [None]:
# !pip install -q inference-gpu[yolo-world]==0.9.13

In [None]:
# !pip install -q supervision==0.22.0

## Imports

In [None]:
import cv2
import supervision as sv
from tqdm import tqdm
from inference.models import YOLOWorld

## Download example data

In [None]:
SOURCE_IMAGE_PATH = r"C:\Users\hzliu\OneDrive\Desktop\ZED-data-collector\SVO\exo_video_sequence\elephant_1\image_0\1751066578109991.png"
SOURCE_IMAGE_PATH_2 = f"{HOME}/SVO/video_seq/seq_1/image_0/1750285875605786.png"
SOURCE_IMAGE_PATH_3 = f"{HOME}/SVO/video_seq/seq_pen/image_0/1750293427688891.png"
SOURCE_IMAGE_PATH_4 = f"{HOME}/SVO/video_seq/seq_pen/image_0/1750293427738899.png"
SOURCE_IMAGE_PATH_5 = f"{HOME}/SVO/video_seq/seq_pen/image_0/1750293428079526.png"


SOURCE_VIDEO_PATH = f"{HOME}/yellow-filling.mp4"

## Run Object Detection







In [None]:
model = YOLOWorld(model_id="yolo_world/l")

In [None]:
classes = ['doll']
model.set_classes(classes)

In [None]:
image = cv2.imread(SOURCE_IMAGE_PATH)
results = model.infer(image, confidence=0.003)

for p in results.predictions:
    print(p.class_name)
detections = sv.Detections.from_inference(results)

In [None]:
results

In [None]:
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2)
LABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK)

In [None]:
labels = [
    f"{classes[class_id]} {confidence:0.3f}"
    for class_id, confidence
    in zip(detections.class_id, detections.confidence)
]

annotated_image = image.copy()
annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)
annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)
sv.plot_image(annotated_image, (10, 10))

In [None]:
left_dir  = r'C:\Users\hzliu\OneDrive\Desktop\ZED-data-collector\SVO\exo_video_sequence\elephant_1\image_0'
right_dir = r'C:\Users\hzliu\OneDrive\Desktop\ZED-data-collector\SVO\exo_video_sequence\elephant_1\image_1'

In [None]:
import cv2
import glob
import os
import numpy as np
from kalman_filter import StereoCalib, KalmanFilter, triangulate, pick_banana_center

calib = StereoCalib.from_txt("calib_stereo.txt")
kf = KalmanFilter(dt=1/60)

left_paths  = sorted(glob.glob(os.path.join(left_dir, "*.png")))
right_paths = sorted(glob.glob(os.path.join(right_dir, "*.png")))
assert len(left_paths) == len(right_paths), "Mismatched number of frames"
num_frames = len(left_paths)

pred_traj = []
gt_traj = []         # Triangulated ground truth
pred_windows = []    # Predicted future trajectories

# Parameters
PRED_INTERVAL = 8   # Number of frames to predict
PRED_GAP = 8
motion_threshold = 0.01

init_buffer = []
kf_initialized = False

for frame_id in range(num_frames):
    img_L = cv2.imread(left_paths[frame_id])
    img_R = cv2.imread(right_paths[frame_id])

    preds_L = model.infer(img_L, confidence=0.003)
    preds_R = model.infer(img_R, confidence=0.003)
    det_L = pick_banana_center(preds_L)
    det_R = pick_banana_center(preds_R)
    
    if det_L and det_R:
        xyz = triangulate(det_L['u'], det_L['v'],
                          det_R['u'], det_R['v'], calib)
        gt_traj.append(xyz)

        if not kf_initialized:
            init_buffer.append(xyz)
            if len(init_buffer) == 2:
                kf.initialize(init_buffer[0], init_buffer[1])
                kf_initialized = True
                pred_traj.append(kf.x[0:3].ravel())
            else:
                pred_traj.append(xyz)
            continue

        kf.predict()
        kf.update(xyz)
        pred_traj.append(kf.x[:3].copy().ravel())

        # forecast (open-loop)
        if frame_id % PRED_GAP == 0 and frame_id + PRED_INTERVAL < num_frames:
            future_preds = []
            saved_kf_state = (kf.x.copy(), kf.P.copy())
            for _ in range(PRED_INTERVAL):
                kf.predict()
                future_preds.append(kf.x[:3].copy().ravel())
            pred_windows.append(future_preds)
            kf.x, kf.P = saved_kf_state

    else:
        if kf_initialized:
            kf.predict()
            pred_traj.append(kf.x[:3].copy().ravel())
        else:
            pred_traj.append(np.array([np.nan, np.nan, np.nan]))


In [None]:
print("GT trajectory frames:", len(gt_traj))
print("KF trajectory frames:", len(pred_traj))
print("Number of prediction windows:", len(pred_windows))

In [None]:
import numpy as np
import plotly.graph_objects as go

pred_traj = np.array(pred_traj)  # (N, 3)
gt_traj   = np.array(gt_traj)    # (N, 3)

print(pred_traj.shape)  # should be (N, 3)
print(gt_traj.shape)    # should be (N, 3)

fig = go.Figure()

min_len = min(len(gt_traj), len(pred_traj))
gt_traj = gt_traj[:min_len]
pred_traj = pred_traj[:min_len]

fig.add_trace(go.Scatter3d(
    x=pred_traj[:, 0],
    y=pred_traj[:, 1],
    z=pred_traj[:, 2],
    mode='lines+markers',
    name='KF Filtered',
    line=dict(width=4, color='blue'),
    marker=dict(size=2)
))

# GT
fig.add_trace(go.Scatter3d(
    x=gt_traj[:, 0],
    y=gt_traj[:, 1],
    z=gt_traj[:, 2],
    mode='lines+markers',
    name='Ground Truth',
    line=dict(width=4, dash='dash', color='black'),
    marker=dict(size=2)
))

for i, seq in enumerate(pred_windows):
    pred_seq = np.array(seq)
    fig.add_trace(go.Scatter3d(
        x=pred_seq[:, 0],
        y=pred_seq[:, 1],
        z=pred_seq[:, 2],
        mode='lines+markers',
        name=f'Predicted Traj {i}',
        line=dict(width=2, color='red'),
        marker=dict(size=2)
    ))
fig.update_layout(
    scene=dict(
        xaxis_title='X (m)',
        yaxis_title='Y (m)',
        zaxis_title='Z (m)',
        aspectmode='data'
    ),
    legend=dict(x=0.05, y=0.95),
    title='Trajectory Prediction: Filtered vs 20-Step Forecasts'
)

fig.show()
