## Method 1 of Localization implementation (when meta data is available)

In [2]:
import json
import cv2
import numpy as np
from math import sin, cos, radians

In [3]:
# --- Rotation helpers ---
def rot_x(angle_rad):
    c = cos(angle_rad); s = sin(angle_rad)
    return np.array([[1,0,0],[0,c,-s],[0,s,c]])

def rot_y(angle_rad):
    c = cos(angle_rad); s = sin(angle_rad)
    return np.array([[c,0,s],[0,1,0],[-s,0,c]])

def rot_z(angle_rad):
    c = cos(angle_rad); s = sin(angle_rad)
    return np.array([[c,-s,0],[s,c,0],[0,0,1]])

In [9]:
# Convert ENU offsets to lat/lon
def meters_to_latlon(lat0_deg, lon0_deg, dx_east, dy_north):
    lat0 = radians(lat0_deg)
    meters_per_deg_lat = 111132.954 - 559.822 * cos(2*lat0) + 1.175 * cos(4*lat0)
    meters_per_deg_lon = (111412.84 * cos(lat0) - 93.5 * cos(3*lat0))
    dlat = dy_north / meters_per_deg_lat
    dlon = dx_east / meters_per_deg_lon
    return lat0_deg + dlat, lon0_deg + dlon

def build_K(fx, fy, cx, cy):
    return np.array([[fx, 0, cx],
                     [0, fy, cy],
                     [0, 0, 1]])

def estimate_fx_from_fov(width_px, fov_deg):
    fov_rad = radians(fov_deg)
    fx = (width_px / 2.0) / np.tan(fov_rad/2.0)
    return fx

# Pixel → geo-localization
def pixel_to_geo(u, v, img_w, img_h, meta, fov_deg=84.0):
    fx = estimate_fx_from_fov(img_w, fov_deg)
    fy = fx
    cx = img_w / 2.0
    cy = img_h / 2.0
    K = build_K(fx, fy, cx, cy)

    # pixel → normalized coords
    x_n = (u - cx) / fx
    y_n = (v - cy) / fy
    dir_cam = np.array([x_n, y_n, -1.0])
    # print(dir_cam)

    dir_cam /= np.linalg.norm(dir_cam)
    # print(dir_cam)

    # orientation
    H = meta['altitude']
    compass = meta.get('compass_heading', 0.0)
    gimbal_pitch = meta.get('gimbal_pitch', 0.0)
    gimbal_heading = meta.get('gimbal_heading', compass)

    # print(H)
    # print(compass)
    # print(gimbal_pitch)
    # print(gimbal_heading)

    yaw_rad = radians(compass)
    pitch_rad = radians(gimbal_pitch)
    gimbal_yaw_rad = radians(gimbal_heading)

    R_gimbal_pitch = rot_x(pitch_rad)
    R_gimbal_yaw = rot_z(gimbal_yaw_rad)
    R_cam2body = R_gimbal_yaw @ R_gimbal_pitch
    R_body2world = rot_z(yaw_rad)
    R_cam2world = R_body2world @ R_cam2body

    dir_world = R_cam2world @ dir_cam
    dir_world /= np.linalg.norm(dir_world)

    p_cam_world = np.array([0.0, 0.0, H])
    if dir_world[2] == 0:
        return None

    t = -p_cam_world[2] / dir_world[2]
    if t <= 0:
        return None

    p_hit = p_cam_world + t * dir_world
    x_east, y_north, z_up = p_hit

    lat_obj, lon_obj = meters_to_latlon(meta['gps_latitude'], meta['gps_longitude'], x_east, y_north)

    return lat_obj, lon_obj

In [10]:
# --- Main visualization ---
def visualize_with_localization(json_path, images_dir, output_dir, image_ids):
    with open(json_path, 'r') as f:
        data = json.load(f)

    images = {img['id']: img for img in data['images']}
    annotations = data['annotations']
    anns_per_image = {}
    for ann in annotations:
        anns_per_image.setdefault(ann['image_id'], []).append(ann)

    # category colors
    colors = {
        1: (0, 255, 0),   # swimmer -> green
        2: (0, 0, 255),   # swimmer with life jacket -> red
        3: (255, 0, 0)    # boat -> blue
    }

    for img_id in image_ids:
        img_info = images[img_id]
        img_path = f"{images_dir}/{img_info['file_name']}"
        frame = cv2.imread(img_path)
        if frame is None:
            print(f"Image {img_path} not found.")
            continue

        anns = anns_per_image.get(img_id, [])
        meta = img_info['meta']
        # print(meta)
        h, w = img_info['height'], img_info['width']

        for ann in anns:
            x, y, bw, bh = ann['bbox']
            track_id = ann['track_id']
            cat_id = ann['category_id']

            # bbox center
            u = x + bw/2
            v = y + bh/2

            # localization
            loc = pixel_to_geo(u, v, w, h, meta, fov_deg=84.0)
            if loc is not None:
                lat, lon = loc
                loc_text = f"ID {track_id} ({lat:.5f},{lon:.5f})"
            else:
                loc_text = f"ID {track_id} (no loc)"

            # draw bbox
            color = colors.get(cat_id, (255,255,255))
            cv2.rectangle(frame, (int(x), int(y)), (int(x+bw), int(y+bh)), color, 2)
            cv2.putText(frame, loc_text, (int(x), int(y)-10), cv2.FONT_HERSHEY_SIMPLEX,
                        0.6, color, 2)

        out_path = f"{output_dir}/{img_info['file_name']}"
        cv2.imwrite(out_path, frame)
        print(f"Saved {out_path}")

In [11]:
json_path = "./dataset/seadronessee/annotations/instances_train_objects_in_water_life_jacket_rm_fixed.json"
images_dir = r"D:\Dataset\images\train"   # adjust path
output_dir = "./inferenced"

# Example: visualize first 5 image IDs
image_ids = [0,1,2,3,4,5,6,7,8,9,10,5041,5042,5043,5044,5045,5046,5047,5048,5049,5050,18218,18219,18220,18221,18222,18223,18224,18225,18226,18227,20442,20443,20444,20445,20446,20447,20448,20449,20450,20451]
visualize_with_localization(json_path, images_dir, output_dir, image_ids)

Saved ./inferenced/0.jpg
Saved ./inferenced/1.jpg
Saved ./inferenced/2.jpg
Saved ./inferenced/3.jpg
Saved ./inferenced/4.jpg
Saved ./inferenced/5.jpg
Saved ./inferenced/6.jpg
Saved ./inferenced/7.jpg
Saved ./inferenced/8.jpg
Saved ./inferenced/9.jpg
Saved ./inferenced/10.jpg
Saved ./inferenced/5041.jpg
Saved ./inferenced/5042.jpg
Saved ./inferenced/5043.jpg
Saved ./inferenced/5044.jpg
Saved ./inferenced/5045.jpg
Saved ./inferenced/5046.jpg
Saved ./inferenced/5047.jpg
Saved ./inferenced/5048.jpg
Saved ./inferenced/5049.jpg
Saved ./inferenced/5050.jpg
Saved ./inferenced/18218.jpg
Saved ./inferenced/18219.jpg
Saved ./inferenced/18220.jpg
Saved ./inferenced/18221.jpg
Saved ./inferenced/18222.jpg
Saved ./inferenced/18223.jpg
Saved ./inferenced/18224.jpg
Saved ./inferenced/18225.jpg
Saved ./inferenced/18226.jpg
Saved ./inferenced/18227.jpg
Saved ./inferenced/20442.jpg
Saved ./inferenced/20443.jpg
Saved ./inferenced/20444.jpg
Saved ./inferenced/20445.jpg
Saved ./inferenced/20446.jpg
Saved ./i