In [1]:
%load_ext autoreload
%autoreload 2
from courtvision.data import (
    Annotation,
    CourtAnnotatedSample,
    KeypointValue,
    RectValue,
    PadelDataset,
    download_data_item,
    ClipSegmentResult,
    StreamType,
    VideoRectValue,
    get_normalized_calibration_image_points_and_clip_ids,
)
from courtvision.geometry import (
    get_planar_points_padel_court,
    get_planar_point_correspondences,
    corners_world_3d,
    convert_obj_points_to_planar,
    denormalize_as_named_points,
    calibrate_and_evaluate,
    CameraInfo,
    find_optimal_calibration_and_pose,
)
from courtvision.swiss import mark_as_deprecated
from courtvision.data import dict_to_points
from courtvision.data import frames_from_clip_segments
import torchvision
from courtvision.swiss import get_latest_file
from pathlib import Path
import json
import torch
import itertools
import boto3
import enum
from hashlib import md5

import cv2
import numpy as np

In [2]:
ANNOTATION_PATH = Path(
    "/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations"
)
ANNOTATION_DATA_PATH = Path(
    "/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations/data"
)
ANNOTATION_DATA_PATH.mkdir(exist_ok=True, parents=True)

annotations_file = get_latest_file(ANNOTATION_PATH, "json")
annotations_file

PosixPath('/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations/project-1-at-2023-06-14-14-07-fbfdfa1a.json')

In [3]:
with open(annotations_file, "r") as f:
    dataset = PadelDataset(samples=json.load(f))
dataset.samples[0].annotations[0]

image_width = 1920
image_height = 1080

In [4]:
(
    normalised_named_points,
    valid_clip_ids,
) = get_normalized_calibration_image_points_and_clip_ids(dataset)
calibration_image_points = denormalize_as_named_points(
    normalised_named_points=normalised_named_points,
    image_width=image_width,
    image_height=image_height,
)

calibration_correspondences = get_planar_point_correspondences(
    image_points=calibration_image_points,
    world_points=corners_world_3d.copy(),
    minimal_set_count=4,
)

pose_correspondences = get_planar_point_correspondences(
    image_points=calibration_image_points,
    world_points=corners_world_3d.copy(),
    minimal_set_count=6,
)


all_world_points, all_labels = dict_to_points(corners_world_3d.copy())
all_image_points, _ = dict_to_points(calibration_image_points.copy())

In [5]:
valid_clip_ids

{PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output041.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output042.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output043.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output046.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output047.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output048.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output049.mp4'),
 PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output052.mp4')}

In [6]:
from dataclasses import dataclass
from itertools import combinations
from itertools import chain, product


# def find_optimal_calibration_and_pose(
#     valid_clip_ids: str,
#     calibration_correspondences: list[tuple[np.array, np.array]],
#     pose_correspondences: list[tuple[np.array, np.array]],
# ) -> CameraInfo:
#     CALIBRATION_MIN_PAIRS = 4
#     CALIBRATION_MAX_PAIRS = min(8, len(calibration_correspondences))

#     POSE_MIN_PAIRS = 4
#     POSE_MAX_PAIRS = min(8, len(pose_correspondences))

#     calibration_indexes = [o for o in range(len(calibration_correspondences))]
#     calibration_selected_pairs: list[tuple[int, ...]] = list(
#         chain.from_iterable(
#             (combinations(calibration_indexes, num_pairs_to_use))
#             for num_pairs_to_use in range(CALIBRATION_MIN_PAIRS, CALIBRATION_MAX_PAIRS)
#         )
#     )

#     pose_indexes = [o for o in range(len(pose_correspondences))]
#     pose_selected_pairs: list[tuple[int, ...]] = list(
#         chain.from_iterable(
#             (combinations(pose_indexes, num_pairs_to_use))
#             for num_pairs_to_use in range(POSE_MIN_PAIRS, POSE_MAX_PAIRS)
#         )
#     )

#     best_error_in_reprojecred_points = 10000.0
#     best_camera_info = None

#     for calibration_pair, pose_pair in product(
#         calibration_selected_pairs, pose_selected_pairs
#     ):
#         calibration_correspondences_selection = [
#             calibration_correspondences[o] for o in calibration_pair
#         ]
#         pose_correspondences_selection = [pose_correspondences[o] for o in pose_pair]

#         camera_info = calibrate_and_evaluate(
#             valid_clip_ids=valid_clip_ids,
#             calibration_correspondences_selected=calibration_correspondences_selection,
#             pose_correspondences_selected=pose_correspondences_selection,
#             image_width=image_width,
#             image_height=image_height,
#             all_image_points=all_image_points,
#             all_world_points=all_world_points,

#         )
#         if camera_info.error_in_reprojecred_points < best_error_in_reprojecred_points:
#             best_camera_info = camera_info
#     if best_camera_info is None:
#         raise RuntimeError("Failed to find optimal calibration and pose")
#     return best_camera_info


camera_info = find_optimal_calibration_and_pose(
    valid_clip_ids=valid_clip_ids,
    calibration_correspondences=calibration_correspondences,
    pose_correspondences=pose_correspondences,
    image_height=image_height,
    image_width=image_width,
    all_image_points=all_image_points,
    all_world_points=all_world_points,
)

In [7]:
# camera_info.save("test.npz")
# camera_info_loaded = CameraInfo.load("test.npz")
camera_info
# assert camera_info_loaded == camera_info

CameraInfo(valid_for_clip_ids={PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output048.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output047.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output052.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output041.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output046.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output043.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output049.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output042.mp4')}, camera_matrix=array([[2.17528979e+03, 0.00000000e+00, 9.27941176e+02],
       [0.00000000e+00, 2.09242651e+03, 9.04851318e+02],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]), distortion_coefficients=array([[ 1.17251984e+00, -7.37315163e+00,  5.62417153e-02,
         9.85908975e-03,  1.44029121e+01]]), rotation_vector=array

In [18]:
camera_info_loaded.error_in_reprojecred_points

array(9.786543, dtype=float32)

In [69]:
camera_info

CameraInfo(valid_for_clip_ids={PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output048.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output052.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output049.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output047.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output043.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output046.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output041.mp4'), PosixPath('s3:/courtvision-source-data-ksaldjfbnv/source/0000/output042.mp4')}, camera_matrix=array([[2.17528979e+03, 0.00000000e+00, 9.27941176e+02],
       [0.00000000e+00, 2.09242651e+03, 9.04851318e+02],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]), distortion_coefficients=array([[ 1.17251984e+00, -7.37315163e+00,  5.62417153e-02,
         9.85908975e-03,  1.44029121e+01]]), rotation_vector=array

In [55]:
from courtvision.geometry import calibrate_and_evaluate
from itertools import combinations, product

calibration_indexes = [o for o in range(len(calibration_correspondences))]
calibration_selected_pairs = (
    list(combinations(calibration_indexes, 3))
    + list(combinations(calibration_indexes, 4))
    + list(combinations(calibration_indexes, 5))
    + list(combinations(calibration_indexes, 6))
)

pose_indexes = [o for o in range(len(pose_correspondences))]
pose_selected_pairs = list(combinations(pose_indexes, 4))

reprojection_error_best = 100000.0
best_calibration_pair = None
best_reprojected_image_points = None
# print(f"{len(selected_pairs)=}")
for calibration_pair in calibration_selected_pairs:
    calibration_correspondences_selection = [
        calibration_correspondences[o] for o in calibration_pair
    ]

    for pose_pair in pose_selected_pairs:
        pose_correspondences_selection = [pose_correspondences[o] for o in pose_pair]
        # pose_correspondences = [pose_correspondences[i] for i in pair]
        # print(f"{len(calibration_correspondences_selection)=}")
        (
            reprojection_error,
            repo_erro,
            reprojected_image_points,
        ) = calibrate_and_evaluate(
            calibration_correspondences_selection,
            pose_correspondences_selection,
            image_width,
            image_height,
            all_world_points=all_world_points,
            all_image_points=all_image_points,
        )
        if reprojection_error_best > reprojection_error:
            reprojection_error_best = reprojection_error
            best_reprojected_image_points = reprojected_image_points
            best_calibration_pair = calibration_pair, pose_pair
            print(
                f"{reprojection_error_best=}, {repo_erro=} @ {best_calibration_pair=}"
            )
    # print(f"{reprojection_error=} {repo_erro=} @ {pair=}")

reprojection_error_best=9.55033, repo_erro=4.62894688232818 @ best_calibration_pair=((0, 1, 2), (0, 1, 2, 3))
reprojection_error_best=9.497264, repo_erro=4.62894688232818 @ best_calibration_pair=((0, 1, 2), (0, 1, 2, 5))
reprojection_error_best=9.474316, repo_erro=4.62894688232818 @ best_calibration_pair=((0, 1, 2), (0, 3, 4, 5))
reprojection_error_best=9.344671, repo_erro=4.62894688232818 @ best_calibration_pair=((0, 1, 2), (1, 2, 3, 4))
reprojection_error_best=9.322469, repo_erro=4.62894688232818 @ best_calibration_pair=((0, 1, 2), (1, 2, 3, 5))
reprojection_error_best=5.6706414, repo_erro=3.7105266429235035 @ best_calibration_pair=((0, 2, 3), (0, 1, 2, 3))
reprojection_error_best=5.6657543, repo_erro=3.7105266429235035 @ best_calibration_pair=((0, 2, 3), (1, 2, 3, 4))
reprojection_error_best=5.619977, repo_erro=3.7105266429235035 @ best_calibration_pair=((0, 2, 3), (1, 2, 3, 5))
reprojection_error_best=5.5997515, repo_erro=3.7105266429235035 @ best_calibration_pair=((0, 2, 3), (1, 2

In [41]:
import matplotlib.pyplot as plt
import rerun as rr
from kornia.utils import tensor_to_image

rr.init("sss", spawn=True)
rr.set_time_seconds("frames", 0)
u_id = None
for i, (frame, uid) in enumerate(
    frames_from_clip_segments(
        dataset=dataset,
        local_path=ANNOTATION_DATA_PATH,
        stream_type=StreamType.VIDEO,
    )
):
    if u_id is None:
        u_id = uid
    if u_id != uid:
        break
    # print(result)
    rr.set_time_seconds("frames", frame["pts"])
    rr.log_image(
        f"image/{uid}",
        tensor_to_image(frame["data"]),
    )
    rr.log_points("image/reprojected_points", best_reprojected_image_points)
    rr.log_points("image/calibration_points", all_image_points)

In [186]:
from itertools import combinations

indexes = [o for o in range(len(calibration_correspondences))]
list(combinations(indexes, 3))
[calibration_correspondences[i] for i in [0, 1, 2]]

[(array([[  0.,   0.,   0.],
         [100.,   0.,   0.],
         [  0., 200.,   0.],
         [100., 200.,   0.],
         [  0.,  20.,   0.],
         [100.,  20.,   0.],
         [  0., 180.,   0.],
         [100., 180.,   0.],
         [ 50., 180.,   0.],
         [  0., 100.,   0.],
         [ 50.,  20.,   0.],
         [100., 100.,   0.],
         [ 50., 100.,   0.]], dtype=float32),
  array([[ 247.2    ,  882.     ],
         [1679.1    ,  884.7    ],
         [ 641.4935 ,  303.31168],
         [1284.935  ,  296.88312],
         [ 360.97403,  725.25977],
         [1566.6234 ,  720.5844 ],
         [ 606.4286 ,  352.987  ],
         [1321.1688 ,  346.55844],
         [ 960.5844 ,  348.89612],
         [ 517.013  ,  485.06494],
         [ 955.9091 ,  718.8312 ],
         [1412.9221 ,  477.46753],
         [ 963.05084,  478.98306]], dtype=float32)),
 (array([[  0. ,   0. ,   0. ],
         [  0. , 200. ,   0. ],
         [  0. ,  20. ,   0. ],
         [  0. , 180. ,   0. ],
     

In [171]:
repo_erro, camera_matrix, dist_coeffs, *_ = cv2.calibrateCamera(
    objectPoints=[
        convert_obj_points_to_planar(obj) for obj, _ in calibration_correspondences
    ],
    imagePoints=[img for _, img in calibration_correspondences],
    imageSize=(image_width, image_height),
    cameraMatrix=None,
    distCoeffs=None,
)
print(repo_erro)

optimal_camera_matrix, roi = cv2.getOptimalNewCameraMatrix(
    camera_matrix,
    dist_coeffs,
    (image_width, image_height),
    1,
    (image_width, image_height),
    False,
)


success, rvec, tvec = cv2.solvePnP(
    objectPoints=np.concatenate([obj for obj, _ in pose_correspondences]),
    imagePoints=np.concatenate([img for _, img in pose_correspondences]),
    cameraMatrix=optimal_camera_matrix,
    distCoeffs=dist_coeffs,
    # None,
    flags=cv2.SOLVEPNP_ITERATIVE,
    useExtrinsicGuess=False,
)


print(f"{success=}")
reprojected_image_points, _ = cv2.projectPoints(
    all_world_points,
    rvec,
    tvec,
    optimal_camera_matrix,
    dist_coeffs,
)
reprojected_image_points = reprojected_image_points.reshape(-1, 2)
reprojection_error = np.linalg.norm(
    reprojected_image_points - all_image_points, axis=1
).mean()
print(f"{reprojection_error=}")
# plt.imshow(draw_points(image.copy(), reprojected_image_points, labels=labels))
# fig = plt.figure(figsize=(15, 15))
# ax1 = fig.add_subplot(221)
# ax2 = fig.add_subplot(222, projection="3d")
# ax1.imshow(
#     draw_points(image.copy(), keypoints[:num_pose_points], labels=labels)
# ), plot_3d_points(
#     x=world_points[:num_pose_points, 0],
#     y=world_points[:num_pose_points, 1],
#     z=world_points[:num_pose_points, 2],
#     plt_axis=ax2,
# )

# calibration_file_name = (
#     CALIBRATION_DATA_PATH.parent
#     / f"{num_pose_points}_error_{reprojection_error:.2f}_calibration"
# )
# np.savez(
#     calibration_file_name,
#     camera_matrix=camera_matrix,
#     optimal_camera_matrix=optimal_camera_matrix,
#     dist_coeffs=dist_coeffs,
#     rotation_vector=rvec,
#     translation_vector=tvec,
# )

11.73663252409283
success=True
reprojection_error=86.42919


In [169]:
all_world_points

(array([[  0. ,   0. ,   0. ],
        [100. ,   0. ,   0. ],
        [  0. , 200. ,   0. ],
        [100. , 200. ,   0. ],
        [  0. ,  20. ,   0. ],
        [100. ,  20. ,   0. ],
        [  0. , 180. ,   0. ],
        [100. , 180. ,   0. ],
        [ 50. , 180. ,   0. ],
        [  0. , 100. ,   0. ],
        [ 50. ,  20. ,   0. ],
        [100. , 100. ,   0. ],
        [  0. ,   0. ,  30. ],
        [100. ,   0. ,  30. ],
        [  0. , 200. ,  30. ],
        [100. , 200. ,  30. ],
        [  0. , 100. ,   7.8],
        [100. , 100. ,   7.8],
        [ 50. , 100. ,   7.8],
        [ 50. , 100. ,   0. ],
        [  0. ,   0. ,  40. ],
        [100. ,   0. ,  40. ],
        [  0. , 200. ,  40. ],
        [100. , 200. ,  40. ],
        [  0. , 100. ,  30. ],
        [100. , 100. ,  30. ]], dtype=float32),
 ['a_front_left',
  'b_front_right',
  'c_back_left',
  'd_back_right',
  'e_left_near_serve_line',
  'f_right_near_serve_line',
  'g_left_far_serve_line',
  'h_right_far_serve_

In [33]:
from courtvision.data import frames_from_clip_segments

u_id = None
for frame, result, uid in frames_from_clip_segments(
    dataset=dataset,
    local_path=ANNOTATION_DATA_PATH,
    stream_type=StreamType.VIDEO,
):
    print(f"{uid}, {frame['pts']} {result=}")
    if u_id is None:
        u_id = uid
    if u_id != uid:
        break

46b86895106499172f9682de0949fd61, 48.266666666666666 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.3 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.333333333333336 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.36666666666667 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.4 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.1771

In [None]:
import torchvision

video_path = "/Users/benjamindecharmoy/projects/courtvision/output041.mp4"
# Constructor allocates memory and a threaded decoder
# instance per video. At the moment it takes two arguments:
# path to the video file, and a wanted stream.
reader = torchvision.io.VideoReader(video_path, "video")

# The information about the video can be retrieved using the
# `get_metadata()` method. It returns a dictionary for every stream, with
# duration and other relevant metadata (often frame rate)
reader_md = reader.get_metadata()

# metadata is structured as a dict of dicts with following structure
# {"stream_type": {"attribute": [attribute per stream]}}
#
# following would print out the list of frame rates for every present video stream
print(reader_md["video"]["fps"])

# we explicitly select the stream we would like to operate on. In
# the constructor we select a default video stream, but
# in practice, we can set whichever stream we would like
# video.set_current_stream("video:0")

In [None]:
dataset.samples[0].annotations[0].result[0].value.start

In [None]:
start_time, end_time = (
    dataset.samples[0].annotations[0].result[0].value.start,
    dataset.samples[0].annotations[0].result[0].value.end,
)

for frame in itertools.takewhile(
    lambda x: x["pts"] <= end_time, reader.seek(start_time)
):
    print(frame["data"].shape)

# reader.seek(start_time)

In [None]:
im
for d in reader:
    print(d["data"].shape)

In [None]:
import matplotlib.pyplot as plt
import cv2
import kornia
import torch

image = cv2.imread("nerdbrid.jpg")
dd = kornia.morphology.gradient(
    torch.tensor(image).unsqueeze(0).float(), kernel=torch.ones(3, 3)
)

plt.imshow(dd.squeeze(0).numpy())

# https://gist.github.com/zed/776423 Something like this. They calling it the maximal rectangle algorithm

In [None]:
import matplotlib.pyplot as plt

plt.imshow(dd.squeeze(0).numpy())

In [None]:
import torch


def maximal_rectangle(matrix):
    if len(matrix) == 0:
        return 0, []
    max_area = 0
    max_box = [0, 0, 0, 0]
    heights = torch.zeros(len(matrix[0]) + 1, dtype=torch.int32)
    positions = torch.zeros(len(matrix[0]) + 1, dtype=torch.int32)
    for row_index, row in enumerate(matrix):
        stack = [-1]
        for i in range(len(heights)):
            if i < len(row):
                if row[i] == 0:
                    heights[i] = 0
                    positions[i] = i
                else:
                    if stack[-1] != -1 and heights[stack[-1]] == row[i]:
                        continue
                    heights[i] = heights[i] + row[i]
                    positions[i] = min(positions[i], positions[stack[-1]])
            while heights[i] < heights[stack[-1]]:
                h = heights[stack.pop()]
                w = i - positions[stack[-1]]
                if h * w > max_area:
                    max_area = h * w
                    max_box = [
                        positions[stack[-1]],
                        row_index - h + 1,
                        i - 1,
                        row_index,
                    ]
            stack.append(i)
    return max_area, max_box


# The image should be a 2D tensor (binary image)
# image = torch.tensor([[1, 0, 1, 0, 0],
#                       [1, 0, 1, 1, 1],
#                       [1, 1, 1, 1, 1],
#                       [1, 0, 0, 1, 0]], dtype=torch.int32)

max_area, bounding_box = maximal_rectangle(image)
print("Max Area: ", max_area)
print("Bounding Box Coordinates: ", bounding_box)

In [None]:
# largest_rectangle(dd.squeeze(0).numpy())
shaped_image = dd.squeeze(0).numpy().max(axis=2).astype(np.uint8)

In [None]:
shaped_image = np.where(shaped_image > 0, 255, 0).astype(np.uint8)
plt.imshow(shaped_image)

In [None]:
max_area, bounding_box = maximal_rectangle(shaped_image)

In [None]:
bounding_box
# plt.imshow(cv2.rectangle(shaped_image,
#                          ( 360, 537), (192, 127), (255, 255, 255), 2))