In [1]:
!pip install opencv-python
!pip install numpy

Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84


In [89]:
# Create function to process frame
import numpy as np
import math

def change_perspective(image, fov, pitch=0, yaw=0):
    height, width = image.shape[:2]
    
    # Define intrinsic camera matrix
    focal_length = width / (2 * np.tan(fov / 2))
    K = np.array([[focal_length, 0, width / 2],
                  [0, focal_length, height / 2],
                  [0, 0, 1]])

    # Define rotation matrix
    pitch = np.deg2rad(pitch)
    yaw = np.deg2rad(yaw)
    R_pitch = np.array([[1, 0, 0],
                        [0, np.cos(pitch), -np.sin(pitch)],
                        [0, np.sin(pitch), np.cos(pitch)]])
    R_yaw = np.array([[np.cos(yaw), 0, np.sin(yaw)],
                      [0, 1, 0],
                      [-np.sin(yaw), 0, np.cos(yaw)]])
    R = np.dot(R_yaw, R_pitch)

    map_x, map_y = cv2.initUndistortRectifyMap(K, None, R, K, (width, height), cv2.CV_32FC1)
    perspective_view = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR)

    return perspective_view

# def get_transformation_matrix([yaw, pitch, roll], position):
#     return ([
#         [math.cos(yaw) * math.cos(pitch), -math.sin(yaw), math.cos(yaw) * math.sin(pitch), position[0]],
#         [math.sin(yaw) * math.cos(pitch), math.cos(yaw), math.sin(yaw) * math.sin(pitch), position[1]],
#         [-math.sin(pitch), 0, math.cos(pitch) , position[2]],
#         [0, 0, 0, 1]
#     ])


def generate_transform_matrix(rot, pos):
    def Rx(theta):
      return np.matrix([[ 1, 0            , 0            ],
                        [ 0, np.cos(theta),-np.sin(theta)],
                        [ 0, np.sin(theta), np.cos(theta)]])
    def Ry(theta):
      return np.matrix([[ np.cos(theta), 0, np.sin(theta)],
                        [ 0            , 1, 0            ],
                        [-np.sin(theta), 0, np.cos(theta)]])
    def Rz(theta):
      return np.matrix([[ np.cos(theta), -np.sin(theta), 0 ],
                        [ np.sin(theta), np.cos(theta) , 0 ],
                        [ 0            , 0             , 1 ]])

    R = Rz(rot[2]) * Ry(rot[1]) * Rx(rot[0])
    xf_rot = np.eye(4)
    xf_rot[:3,:3] = R

    xf_pos = np.eye(4)
    xf_pos[:3,3] = pos

    # barbershop_mirros_hd_dense:
    # - camera plane is y+z plane, meaning: constant x-values
    # - cameras look to +x

    # Don't ask me...
    extra_xf = np.matrix([
        [-1, 0, 0, 0],
        [ 0, 0, 1, 0],
        [ 0, 1, 0, 0],
        [ 0, 0, 0, 1]])
    # NerF will cycle forward, so lets cycle backward.
    shift_coords = np.matrix([
        [0, 0, 1, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 0, 1]])
    xf = shift_coords @ extra_xf @ xf_pos
    assert np.abs(np.linalg.det(xf) - 1.0) < 1e-4
    xf = xf @ xf_rot
    return xf
    

def process_frame(frame, position):
    height, width = frame.shape[:2]
    
    # Split image
    top_half = frame[:height // 2, :]
    bottom_half = frame[height // 2:, :]
    
    width_splits = [[0, width // 3], [(width // 3), 2*(width // 3)], [2*(width // 3), width]]
    print(width_splits)
    
    # Process top half
    images = [top_half[:, width_split[0]:width_split[1]] for width_split in width_splits]
    
    # Process bottom half
    images = images + [
        # cv2.rotate(bottom_half[:, width_splits[0][0]:width_splits[0][1]], cv2.ROTATE_90_CLOCKWISE), 
        # cv2.rotate(bottom_half[:, width_splits[1][0]:width_splits[1][1]], cv2.ROTATE_90_COUNTERCLOCKWISE),
        # cv2.rotate(bottom_half[:, width_splits[2][0]:width_splits[2][1]], cv2.ROTATE_90_CLOCKWISE),
        bottom_half[:, width_splits[0][0]:width_splits[0][1]],
        bottom_half[:, width_splits[1][0]:width_splits[1][1]],
        bottom_half[:, width_splits[2][0]:width_splits[2][1]]
    ]
    
    images = [change_perspective(image, np.pi/2) for image in images]
    
    # Add transformation matrix to each image, images in order of left to right (top row first)   
    return ([
        (images[0], generate_transform_matrix([0, np.pi/2, 0], position)),
        (images[1], generate_transform_matrix([0, 0, 0], position)),
        (images[2], generate_transform_matrix([0, -np.pi/2, 0], position)),
        (images[3], generate_transform_matrix([-np.pi/2, -np.pi/2, 0], position)),
        (images[4], generate_transform_matrix([0, np.pi, -np.pi/2], position))
        # (images[5], generate_transform_matrix([0, -np.pi/2, 0], position)),
    ])
    

In [97]:
import cv2
import json

# Velocity is a hyperparameter that should be set (in m/s)
velocity = 0.8

video_path = '360_video.mp4'
cap = cv2.VideoCapture(video_path)

frame_rate = cap.get(cv2.CAP_PROP_FPS)
print(frame_rate)
print(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Set transformation json
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
ret, frame = cap.read()
height, width = frame.shape[:2]
height = height / 2
width = width / 3

transformations = {
    "camera_angle_x": np.pi / 2,
    "camera_angle_y": np.pi / 2,
    "fl_x": width / (2 * np.tan(np.pi / 4)),
    "fl_y": height / (2 * np.tan(np.pi / 4)),
    "k1": 0,
    "k2": 0,
    "p1": 0,
    "p2": 0,
    "cx": width / 2,
    "cy": height / 2,
    "w": width,
    "h": height,
    "aabb_scale": 4
}
frame_transformations = []

# Process each frame and save to file
for time_step in range(0, 5): #frame_count // int(frame_rate)):
    cap.set(cv2.CAP_PROP_POS_FRAMES, time_step)
    ret, frame = cap.read()
    if not ret:
        break
        
    # Get position using velocity and time step
    position = [0, 0, (velocity * time_step)]
    
    perspective_imgs = process_frame(frame, position)
    # Sace each image
    for image_idx, image_tuple in enumerate(perspective_imgs):
        cv2.imwrite(f'./images/{str(time_step*6 + image_idx).zfill(5)}.jpg', image_tuple[0])
        
        frame_transformations.append({
            "file_path": f'images/{str(time_step*6 + image_idx).zfill(5)}.jpg',
            "sharpness": 25,
            "transform_matrix": image_tuple[1].tolist()
        })

cap.release()

# Save transformations
transformations["frames"] = frame_transformations
with open('transforms.json', 'w', encoding='utf-8') as f:
    json.dump(transformations, f, ensure_ascii=False, indent=4)

30.0
397
[[0, 640], [640, 1280], [1280, 1920]]
[[0, 640], [640, 1280], [1280, 1920]]
[[0, 640], [640, 1280], [1280, 1920]]
[[0, 640], [640, 1280], [1280, 1920]]
[[0, 640], [640, 1280], [1280, 1920]]
