In [99]:
%load_ext autoreload
%autoreload 2
from courtvision.data import (
    Annotation,
    CourtAnnotatedSample,
    KeypointValue,
    RectValue,
    PadelDataset,
    download_data_item,
    ClipSegmentResult,
    StreamType,
    VideoRectValue,
    get_normalized_calibration_image_points,
)
from courtvision.geometry import (
    get_planar_points_padel_court,
    get_planar_point_correspondences,
    corners_world_3d,
    convert_obj_points_to_planar,
)
import torchvision
from courtvision.swiss import get_latest_file
from pathlib import Path
import json
import torch
import itertools
import boto3
import enum
from hashlib import md5

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [100]:
ANNOTATION_PATH = Path(
    "/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations"
)
ANNOTATION_DATA_PATH = Path(
    "/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations/data"
)
ANNOTATION_DATA_PATH.mkdir(exist_ok=True, parents=True)

annotations_file = get_latest_file(ANNOTATION_PATH, "json")
annotations_file

PosixPath('/Users/benjamindecharmoy/projects/courtvision/datasets/clip_segmentations/project-1-at-2023-06-14-14-07-fbfdfa1a.json')

In [101]:
with open(annotations_file, "r") as f:
    dataset = PadelDataset(samples=json.load(f))
dataset.samples[0].annotations[0]

image_width = 1920
image_height = 1080

In [102]:
normalized_calibration_image_points = get_normalized_calibration_image_points(dataset)
calibration_image_points = {
    k: (v[0] * image_width, v[1] * image_height)
    for k, v in normalized_calibration_image_points.items()
}
calibration_correspondences = get_planar_point_correspondences(
    image_points=calibration_image_points,
    world_points=corners_world_3d.copy(),
)
for i, (obj, _) in enumerate(calibration_correspondences):
    print(obj.shape)
    print(obj)
    print(f"{i=}")
    convert_obj_points_to_planar(obj)

(13, 3)
[[  0.   0.   0.]
 [100.   0.   0.]
 [  0. 200.   0.]
 [100. 200.   0.]
 [  0.  20.   0.]
 [100.  20.   0.]
 [  0. 180.   0.]
 [100. 180.   0.]
 [ 50. 180.   0.]
 [  0. 100.   0.]
 [ 50.  20.   0.]
 [100. 100.   0.]
 [ 50. 100.   0.]]
i=0
(10, 3)
[[  0.    0.    0. ]
 [  0.  200.    0. ]
 [  0.   20.    0. ]
 [  0.  180.    0. ]
 [  0.  100.    0. ]
 [  0.    0.   30. ]
 [  0.  200.   30. ]
 [  0.  100.    7.8]
 [  0.    0.   40. ]
 [  0.  200.   40. ]]
i=1
(11, 3)
[[100.    0.    0. ]
 [100.  200.    0. ]
 [100.   20.    0. ]
 [100.  180.    0. ]
 [100.  100.    0. ]
 [100.    0.   30. ]
 [100.  200.   30. ]
 [100.  100.    7.8]
 [100.    0.   40. ]
 [100.  200.   40. ]
 [100.  100.   30. ]]
i=2
(6, 3)
[[  0.   0.   0.]
 [100.   0.   0.]
 [  0.   0.  30.]
 [100.   0.  30.]
 [  0.   0.  40.]
 [100.   0.  40.]]
i=3
(6, 3)
[[  0. 200.   0.]
 [100. 200.   0.]
 [  0. 200.  30.]
 [100. 200.  30.]
 [  0. 200.  40.]
 [100. 200.  40.]]
i=4
(5, 3)
[[  0.  100.    0. ]
 [100.  100.    0.

In [104]:
import cv2

repo_erro, camera_matrix, dist_coeffs, *_ = cv2.calibrateCamera(
    objectPoints=[
        convert_obj_points_to_planar(obj) for obj, _ in calibration_correspondences
    ],
    imagePoints=[img for _, img in calibration_correspondences],
    imageSize=(image_width, image_height),
    cameraMatrix=None,
    distCoeffs=None,
)
print(repo_erro)

h, w = image.shape[:2]
optimal_camera_matrix, roi = cv2.getOptimalNewCameraMatrix(
    camera_matrix, dist_coeffs, (w, h), 1, (w, h), False
)

# dst = cv2.undistort(image, camera_matrix, dist_coeffs, None, optimal_camera_matrix)
# image_points_array_undist = np.array(
#     [
#         (x, y)
#         for x, y in cv2.undistortPoints(
#             image_points_array, camera_matrix, dist_coeffs, None, optimal_camera_matrix
#         ).squeeze()
#     ]
# )
num_pose_points = 24
success, rvec, tvec = cv2.solvePnP(
    world_points[:num_pose_points],
    keypoints[:num_pose_points],
    optimal_camera_matrix,
    dist_coeffs,
    # None,
    flags=cv2.SOLVEPNP_ITERATIVE,
    useExtrinsicGuess=False,
)


print(f"{success=}")
reprojected_image_points, _ = cv2.projectPoints(
    world_points[:num_pose_points],
    rvec,
    tvec,
    optimal_camera_matrix,
    dist_coeffs,
)
reprojected_image_points = reprojected_image_points.reshape(-1, 2)
reprojection_error = np.linalg.norm(
    reprojected_image_points[:num_pose_points] - keypoints[:num_pose_points], axis=1
).mean()
print(f"{reprojection_error=}")
plt.imshow(draw_points(image.copy(), reprojected_image_points, labels=labels))
fig = plt.figure(figsize=(15, 15))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222, projection="3d")
ax1.imshow(
    draw_points(image.copy(), keypoints[:num_pose_points], labels=labels)
), plot_3d_points(
    x=world_points[:num_pose_points, 0],
    y=world_points[:num_pose_points, 1],
    z=world_points[:num_pose_points, 2],
    plt_axis=ax2,
)

calibration_file_name = (
    CALIBRATION_DATA_PATH.parent
    / f"{num_pose_points}_error_{reprojection_error:.2f}_calibration"
)
np.savez(
    calibration_file_name,
    camera_matrix=camera_matrix,
    optimal_camera_matrix=optimal_camera_matrix,
    dist_coeffs=dist_coeffs,
    rotation_vector=rvec,
    translation_vector=tvec,
)

11.631367545344473


In [33]:
from courtvision.data import frames_from_clip_segments

u_id = None
for frame, result, uid in frames_from_clip_segments(
    dataset=dataset,
    local_path=ANNOTATION_DATA_PATH,
    stream_type=StreamType.VIDEO,
):
    print(f"{uid}, {frame['pts']} {result=}")
    if u_id is None:
        u_id = uid
    if u_id != uid:
        break

46b86895106499172f9682de0949fd61, 48.266666666666666 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.3 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.333333333333336 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.36666666666667 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.17713093884683, labels=['rally_winner_fg']))
46b86895106499172f9682de0949fd61, 48.4 result=ClipSegmentResult(original_length=66.966667, kind='labels', value=LabelValue(start=48.24564186313338, end=57.1771

In [37]:
import matplotlib.pyplot as plt
import rerun as rr
from kornia.utils import tensor_to_image

rr.init("sss", spawn=True)
rr.set_time_seconds("frames", 0)
u_id = None
for i, (frame, result, uid) in enumerate(
    frames_from_clip_segments(
        dataset=dataset,
        local_path=ANNOTATION_DATA_PATH,
        stream_type=StreamType.VIDEO,
    )
):
    if u_id is None:
        u_id = uid
    if u_id != uid:
        break
    print(result)
    rr.set_time_seconds("frames", frame["pts"])
    rr.log_image(
        f"image/{uid}",
        tensor_to_image(frame["data"]),
    )

In [None]:
import torchvision

video_path = "/Users/benjamindecharmoy/projects/courtvision/output041.mp4"
# Constructor allocates memory and a threaded decoder
# instance per video. At the moment it takes two arguments:
# path to the video file, and a wanted stream.
reader = torchvision.io.VideoReader(video_path, "video")

# The information about the video can be retrieved using the
# `get_metadata()` method. It returns a dictionary for every stream, with
# duration and other relevant metadata (often frame rate)
reader_md = reader.get_metadata()

# metadata is structured as a dict of dicts with following structure
# {"stream_type": {"attribute": [attribute per stream]}}
#
# following would print out the list of frame rates for every present video stream
print(reader_md["video"]["fps"])

# we explicitly select the stream we would like to operate on. In
# the constructor we select a default video stream, but
# in practice, we can set whichever stream we would like
# video.set_current_stream("video:0")

In [None]:
dataset.samples[0].annotations[0].result[0].value.start

In [None]:
start_time, end_time = (
    dataset.samples[0].annotations[0].result[0].value.start,
    dataset.samples[0].annotations[0].result[0].value.end,
)

for frame in itertools.takewhile(
    lambda x: x["pts"] <= end_time, reader.seek(start_time)
):
    print(frame["data"].shape)

# reader.seek(start_time)

In [None]:
im
for d in reader:
    print(d["data"].shape)

In [None]:
import matplotlib.pyplot as plt
import cv2
import kornia
import torch

image = cv2.imread("nerdbrid.jpg")
dd = kornia.morphology.gradient(
    torch.tensor(image).unsqueeze(0).float(), kernel=torch.ones(3, 3)
)

plt.imshow(dd.squeeze(0).numpy())

# https://gist.github.com/zed/776423 Something like this. They calling it the maximal rectangle algorithm

In [None]:
import matplotlib.pyplot as plt

plt.imshow(dd.squeeze(0).numpy())

In [None]:
import torch


def maximal_rectangle(matrix):
    if len(matrix) == 0:
        return 0, []
    max_area = 0
    max_box = [0, 0, 0, 0]
    heights = torch.zeros(len(matrix[0]) + 1, dtype=torch.int32)
    positions = torch.zeros(len(matrix[0]) + 1, dtype=torch.int32)
    for row_index, row in enumerate(matrix):
        stack = [-1]
        for i in range(len(heights)):
            if i < len(row):
                if row[i] == 0:
                    heights[i] = 0
                    positions[i] = i
                else:
                    if stack[-1] != -1 and heights[stack[-1]] == row[i]:
                        continue
                    heights[i] = heights[i] + row[i]
                    positions[i] = min(positions[i], positions[stack[-1]])
            while heights[i] < heights[stack[-1]]:
                h = heights[stack.pop()]
                w = i - positions[stack[-1]]
                if h * w > max_area:
                    max_area = h * w
                    max_box = [
                        positions[stack[-1]],
                        row_index - h + 1,
                        i - 1,
                        row_index,
                    ]
            stack.append(i)
    return max_area, max_box


# The image should be a 2D tensor (binary image)
# image = torch.tensor([[1, 0, 1, 0, 0],
#                       [1, 0, 1, 1, 1],
#                       [1, 1, 1, 1, 1],
#                       [1, 0, 0, 1, 0]], dtype=torch.int32)

max_area, bounding_box = maximal_rectangle(image)
print("Max Area: ", max_area)
print("Bounding Box Coordinates: ", bounding_box)

In [None]:
# largest_rectangle(dd.squeeze(0).numpy())
shaped_image = dd.squeeze(0).numpy().max(axis=2).astype(np.uint8)

In [None]:
shaped_image = np.where(shaped_image > 0, 255, 0).astype(np.uint8)
plt.imshow(shaped_image)

In [None]:
max_area, bounding_box = maximal_rectangle(shaped_image)

In [None]:
bounding_box
# plt.imshow(cv2.rectangle(shaped_image,
#                          ( 360, 537), (192, 127), (255, 255, 255), 2))