## Setup

In [1]:
import numpy as np
import cv2 as cv
import glob
from pathlib import Path
import json
import gradio as gr
import os
from PIL import Image, ImageDraw
import threading


# Download the images
def in_colab() -> bool:
    try:
        import google.colab
        return True
    except Exception:
        return False

REPO_URL = "https://github.com/Tiromachelan/camera-pose-estimator"

if in_colab():
  if not Path("camera-pose-estimator/images").exists():
    !git clone {REPO_URL}

## Calibration

In [2]:
class Calibration:
    @staticmethod
    def calibrateCamera(image_files):
        print(f"calibrating data in folder {image_files}")
        # termination criteria
        criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)

        # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
        pattern_size = (9, 6)
        objp = np.zeros((pattern_size[0] * pattern_size[1], 3), np.float32)
        objp[:, :2] = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T.reshape(-1, 2)
        # Arrays to store object points and image points from all the images.
        objpoints = [] # 3d point in real world space
        imgpoints = [] # 2d points in image plane.

        images = glob.glob(image_files + '/*.jpeg')

        gray = None
        for fname in images:
            img = cv.imread(fname)
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            #print(f"{'processing image '}{fname}")

            # Find the chess board corners
            ret, corners = cv.findChessboardCorners(gray, pattern_size, None)

            # If found, add object points, image points (after refining them)
            if ret == True:
                print(f"{"Processed "}{fname}")
                objpoints.append(objp)

                corners2 = cv.cornerSubPix(gray,corners, (11,11), (-1,-1), criteria)
                imgpoints.append(corners2)
        return(cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None))

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

def calibrate_and_display(folder_path):
    ret, mtx, dist, rvecs, tvecs = Calibration.calibrateCamera(folder_path)

    files = glob.glob(os.path.join(folder_path, "*.jpeg")) + glob.glob(os.path.join(folder_path, "*.JPEG"))

    results = {
        "return_value": ret,
        "camera_matrix": mtx,
        "distortion_coefficients": dist,
        "rotation_vectors": rvecs,
        "translation_vectors": tvecs,
        "image_paths": files
    }
    json_output = json.dumps(results, cls=NumpyEncoder, indent=4)

    with open("calibration.json", "w") as f:
        f.write(json_output)

    return json_output

if in_colab():
    calibrate_and_display("camera-pose-estimator/images")
else:
    calibrate_and_display("images")

calibrating data in folder images
Processed images/DSCN0504.jpeg
Processed images/DSCN0494.jpeg
Processed images/DSCN0498.jpeg
Processed images/DSCN0499.jpeg
Processed images/DSCN0495.jpeg
Processed images/DSCN0498.jpeg
Processed images/DSCN0499.jpeg
Processed images/DSCN0495.jpeg
Processed images/DSCN0483.jpeg
Processed images/DSCN0488.jpeg
Processed images/DSCN0502.jpeg
Processed images/DSCN0483.jpeg
Processed images/DSCN0488.jpeg
Processed images/DSCN0502.jpeg
Processed images/DSCN0484.jpeg
Processed images/DSCN0492.jpeg
Processed images/DSCN0484.jpeg
Processed images/DSCN0492.jpeg
Processed images/DSCN0493.jpeg
Processed images/DSCN0485.jpeg
Processed images/DSCN0503.jpeg
Processed images/DSCN0493.jpeg
Processed images/DSCN0485.jpeg
Processed images/DSCN0503.jpeg
Processed images/DSCN0489.jpeg
Processed images/DSCN0489.jpeg
Processed images/DSCN0490.jpeg
Processed images/DSCN0486.jpeg
Processed images/DSCN0490.jpeg
Processed images/DSCN0486.jpeg
Processed images/DSCN0501.jpeg
Proce

In [3]:
class calib:
  @staticmethod
  def load_calibration(jsonPath:str='calibration.json'):
    with open(jsonPath, 'r') as file:
            calibrationDict = json.load(file)
    #extracting k and distance coefficientse
    k = calibrationDict['camera_matrix']
    distanceCoeff = calibrationDict['distortion_coefficients']
    return k, distanceCoeff


In [4]:
class Model:
  @staticmethod
  def load_points(csv_path:str):
    points = np.loadtxt(csv_path)
    return points

# Gradio Picker

In [5]:

def launch_point_picker(my_image):
    points_store = []
    app = None
    SELECTED_POINTS = None

    def _to_pil_from_numpy(arr: np.ndarray) -> Image.Image:
        arr = np.asarray(arr)
        # channel-first -> channel-last
        if arr.ndim == 3 and arr.shape[0] in (1,3,4) and arr.shape[-1] not in (1,3,4):
            arr = np.transpose(arr, (1,2,0))
        if np.issubdtype(arr.dtype, np.floating):
            # scale floats in [0,1] to [0,255]
            arr = (np.clip(arr, 0.0, 1.0) * 255.0).round().astype(np.uint8)
        elif arr.dtype != np.uint8:
            arr = np.clip(arr, 0, 255).astype(np.uint8)
        # Choose mode
        if arr.ndim == 2:
            return Image.fromarray(arr, mode="L")
        if arr.ndim == 3 and arr.shape[2] == 3:
            return Image.fromarray(arr, mode="RGB")
        if arr.ndim == 3 and arr.shape[2] == 4:
            return Image.fromarray(arr, mode="RGBA")
        if arr.ndim == 3 and arr.shape[2] == 1:
            return Image.fromarray(arr[:,:,0], mode="L")
        raise ValueError(f"Unsupported array shape: {arr.shape}")

    def _to_pil(img):
        if isinstance(img, Image.Image):
            return img
        if isinstance(img, np.ndarray):
            return _to_pil_from_numpy(img)
        raise gr.Error("Set `my_image` to a PIL image or NumPy array before launching.")

    def _draw_points(base_img: Image.Image, pts, radius=5):
        img = base_img.copy().convert("RGB")
        d = ImageDraw.Draw(img)
        for (x, y) in pts:
            d.ellipse([x-radius, y-radius, x+radius, y+radius], outline=(255,0,0), width=2)
        return img

    # Prepare base image from notebook variable
    # if 'my_image' not in globals():
        # raise RuntimeError("Please define `my_image` (PIL image or NumPy array) before running this cell.")
    # base_pil = _to_pil(globals()['my_image'])
    base_pil = _to_pil(my_image)

    def _refresh_numpy():
        """Return current preview (base + points) as numpy for Gradio."""
        return np.array(_draw_points(base_pil, points_store))

    def on_click(evt: gr.SelectData):
        # Get coordinates robustly
        x = y = None
        if hasattr(evt, "index") and evt.index is not None:
            try: x, y = evt.index
            except: pass
        if (x is None or y is None) and hasattr(evt, "x") and hasattr(evt, "y"):
            x, y = evt.x, evt.y
        if x is None or y is None:
            return gr.update(), json.dumps(points_store)

        # Clamp to image bounds
        w, h = base_pil.size
        x = int(max(0, min(w-1, x)))
        y = int(max(0, min(h-1, y)))

        points_store.append([x, y])
        return _refresh_numpy(), json.dumps(points_store)

    def undo_last():
        if points_store:
            points_store.pop()
        return _refresh_numpy(), json.dumps(points_store)

    def clear_points():
        points_store.clear()
        return np.array(base_pil), "[]"

    def done_btn_click():
        """Save to notebook var `selected_points` and close the app."""
        global SELECTED_POINTS
        SELECTED_POINTS = [list(p) for p in points_store]
        try:
            ip = get_ipython()
            if ip is not None:
                ip.user_ns['selected_points'] = SELECTED_POINTS
        except Exception:
            pass
        # threading.Thread(target=lambda: app.close(), daemon=True).start()
        with open('selected_points.json', 'w') as f:
            json.dump(SELECTED_POINTS, f)
        return f"✅ Saved {len(SELECTED_POINTS)} points to `selected_points`. Closing…"
    

    with gr.Blocks(title="Point Picker (single image)") as demo:
        gr.Markdown("**Click on the image to add points.** Use Undo / Clear as needed, then press **Done**.")
        img = gr.Image(
            value=np.array(base_pil), label="Image (click to add points)",
            type="numpy", interactive=True, sources=[]  # sources=[] disables uploads
        )
        with gr.Row():
            undo_btn = gr.Button("↩️ Undo")
            clear_btn = gr.Button("🧹 Clear")
            done_btn = gr.Button("✅ Done", variant="primary")
        pts_text = gr.Textbox(label="Points (JSON)", value="[]", interactive=False)
        status = gr.Markdown("")

        # One image used for both input and output
        img.select(on_click, inputs=None, outputs=[img, pts_text])
        undo_btn.click(lambda: undo_last(), outputs=[img, pts_text])
        clear_btn.click(lambda: clear_points(), outputs=[img, pts_text])
        done_btn.click(done_btn_click, outputs=[status])


        app = demo.launch(inline=True, prevent_thread_lock=True)
        return app

# Homography --> Pose (Explicit)

# OpenCV Solution

In [6]:
# my_image can be a PIL.Image or a NumPy array (H,W[,C])
from PIL import Image
if in_colab():
    my_image = Image.open("camera-pose-estimator" + "/" + "DSCN0851.JPG")
else:
    my_image = Image.open("DSCN0851.JPG")

selected_points = []

app = launch_point_picker(my_image)   # launches inline, non-blocking

# ... later, after clicking points and pressing "Done":
print(selected_points)  # list of [x, y] pixel coordinates


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


[]
