# CSCI576 Final Project

**Rui Zhu** 

**Yuyang Huang**

**Zixun Lu**

**Junyu Yan**

## Requirements

**Python 3.10**

**opencv-python==4.6.0.66**

**carvekit https://anodev.ru/image-background-remove-tool/**

**Please put the input video into the "videos" folder of your current path**

In [None]:
import os
import time
import math
import cv2 as cv
import numpy as np
from tqdm import tqdm
import shelve

# Generate foreground labels
import torch
from PIL import Image
from carvekit.api.interface import Interface
from carvekit.ml.wrap.fba_matting import FBAMatting
from carvekit.ml.wrap.tracer_b7 import TracerUniversalB7
from carvekit.ml.wrap.u2net import U2NET
from carvekit.pipelines.postprocessing import MattingMethod
from carvekit.pipelines.preprocessing import PreprocessingStub
from carvekit.trimap.generator import TrimapGenerator

# Step 0: Prepare global variables and helper functions

**If you want to use another test video instead of "test3.mp4", please change the FILENAME accordingly on Line 31**

In [None]:
def read_frames(path):
    """
    return video frames
    """
    cap = cv.VideoCapture(path)
    if not cap.isOpened():
        raise IOError("Open video failed!")

    frames = []
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            break

        frames.append(frame)

    cap.release()
    return frames

def dir(path):
    if not os.path.exists(path):
        os.mkdir(path)


# prepare global variables
INPUT_VIDEO_DIR = 'videos'
dir(INPUT_VIDEO_DIR)
OUTPUT_PATH = 'out'
dir(OUTPUT_PATH)

FILENAME = 'test3.mp4'
INPUT_VIDEO_PATH = os.path.join(INPUT_VIDEO_DIR, FILENAME)


OUTPUT_FOREGROUND_VIDEO_NAME = FILENAME.split('.')[0] + '_foreground.mp4'
OUTPUT_BACKGROUND_VIDEO_NAME = FILENAME.split('.')[0] + '_background.mp4'
OUTPUT_PANORAMA_IMG_NAME = FILENAME.split('.')[0] + '_panorama.jpg'
HOLE_FILLED_BACKGROUND_VIDEO_NAME = 'hole_filled_' + FILENAME.split('.')[0] + '_background.mp4'

OUTPUT_FOREGROUND_VIDEO_PATH = os.path.join(OUTPUT_PATH, OUTPUT_FOREGROUND_VIDEO_NAME)
OUTPUT_BACKGROUND_VIDEO_PATH = os.path.join(OUTPUT_PATH, OUTPUT_BACKGROUND_VIDEO_NAME)
OUTPUT_PANORAMA_IMG_PATH = os.path.join(OUTPUT_PATH, OUTPUT_PANORAMA_IMG_NAME)
HOLE_FILLED_BACKGROUND_VIDEO_PATH = os.path.join(OUTPUT_PATH, HOLE_FILLED_BACKGROUND_VIDEO_NAME)

H_PERSIST_FILENAME = 'H_persist'
FOREGROUND_LABELS_PERSISTENCE_FILENAME = '{}_foreground_labels.npy'.format(FILENAME.split('.')[0])


cap = cv.VideoCapture(INPUT_VIDEO_PATH)
if not cap.isOpened():
    raise IOError("Open video failed!")

fps = int(cap.get(cv.CAP_PROP_FPS))
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
total_frame = cap.get(cv.CAP_PROP_FRAME_COUNT)
cap.release()

# Step 1: Separate background and foreground

###  1.1 Use [neural networks](https://github.com/OPHoperHPO/image-background-remove-tool) to remove the background from each frame of the video
> This neural networks will remove the background from the input image and outputs an image with only the foreground and alpha of 0 in the background part.
### 1.2 Generate foreground labels in each frame
    1. Put each frame of the original video into nn to get background removed image/frame.
    2. Compare the foreground-only output frame with original frame pixel by pixel.
    3. Label pixel as foreground if alpha value is not 0.

**Since the background removal of the entire video consumes time, we cached all labels for development purposes.**

In [None]:
def get_foreground_labels(path, early_quit_frame_number=math.inf, early_quit_time=None):

    # NN setting
    _device='cuda' if torch.cuda.is_available() else 'cpu'
    seg_net = TracerUniversalB7(device=_device, batch_size=5, input_image_size=640, fp16=True)
    fba = FBAMatting(device=_device, input_tensor_size=2048, batch_size=1)
    trimap = TrimapGenerator(prob_threshold=231, kernel_size=30, erosion_iters=5)
    preprocessing = PreprocessingStub()
    postprocessing = MattingMethod(matting_module=fba, trimap_generator=trimap, device=_device)
    interface = Interface(pre_pipe=preprocessing, post_pipe=postprocessing, seg_pipe=seg_net)


    cap = cv.VideoCapture(path)
    if not cap.isOpened:
        raise IOError("Open video failed!")

    foreground_labels = []
    startTime = time.time()
    try:
        for _ in tqdm(range(int(min(cap.get(cv.CAP_PROP_FRAME_COUNT), early_quit_frame_number)))):
            if early_quit_time and time.time() - startTime > early_quit_time:
                break

            ret, frame = cap.read()
            if not ret:
                raise IOError("Read frame failed!")

            frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
            image = Image.fromarray(frame, mode="RGB")
            resImg = interface([image])[0]
            foreground = np.asarray(resImg)
            foreground_label = set()
            for i in range(len(foreground)):
                for j in range(len(foreground[0])):
                    if foreground[i][j][-1] != 0:
                        foreground_label.add((i, j))
            foreground_labels.append(foreground_label)

    except KeyboardInterrupt:
        print('Interrupted!')

    cap.release()
    cv.destroyAllWindows()
    return foreground_labels

In [None]:
foreground_labels = None

# if there exists foreground label cache file, just load it. Otherwise, compute it
if not os.path.exists(FOREGROUND_LABELS_PERSISTENCE_FILENAME):
    print('Generate foreground labels...')
    # compute foreground labels
    foreground_labels = get_foreground_labels(INPUT_VIDEO_PATH)
    # save foreground labels
    np.save(FOREGROUND_LABELS_PERSISTENCE_FILENAME, [list(label) for label in foreground_labels])
    print('Generate labels done.')
else:
    print('Load persisted file...')
    # If there is stored foreground label file, load it locally.
    foreground_labels = np.load(FOREGROUND_LABELS_PERSISTENCE_FILENAME, allow_pickle=True)
    # convert foreground_labels to list of set
    foreground_labels = [set(_) for _ in foreground_labels]
    print('Load labels done.')

# Step 2 Generate foreground video and background video without holes


## 2.1 generate foreground video and background video with holes


In [None]:
def generate_foreground_background_videos(foreground_labels):
    if os.path.exists(OUTPUT_FOREGROUND_VIDEO_PATH) and os.path.exists(OUTPUT_BACKGROUND_VIDEO_PATH):
        return
    if not os.path.exists('out'):
        os.makedirs('out')

    cap = cv.VideoCapture(INPUT_VIDEO_PATH)
    if not cap.isOpened:
        raise IOError("Open video failed!")

    foreground_out = cv.VideoWriter(OUTPUT_FOREGROUND_VIDEO_PATH, cv.VideoWriter_fourcc(*'XVID'), int(cap.get(cv.CAP_PROP_FPS)),
                                (int(cap.get(cv.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
    background_out = cv.VideoWriter(OUTPUT_BACKGROUND_VIDEO_PATH, cv.VideoWriter_fourcc(*'XVID'), int(cap.get(cv.CAP_PROP_FPS)),
                                (int(cap.get(cv.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
    if not foreground_out.isOpened() or not background_out.isOpened():
        raise IOError("Init videoWriter failed!")

    try:
        for i in tqdm(range(len(foreground_labels))):
            ret, frame = cap.read()
            if not ret:
                raise IOError("Read frame failed!")
                
            label = foreground_labels[i]
            foreground = np.zeros_like(frame)
            foreground.fill(255)
            for (j, k) in label:
                foreground[j][k] = frame[j][k]
                frame[j][k] = [255,255,255]

            foreground_out.write(foreground.copy())
            background_out.write(frame.copy())
            
    except KeyboardInterrupt:
        print('Interrupted!')    

    cap.release()
    foreground_out.release()
    background_out.release()
    cv.destroyAllWindows()

In [None]:
# generate foreground video and background video with holes, if they don't exist
generate_foreground_background_videos(foreground_labels)

## 2.2 fill background holes

In [None]:
# compute Homography matrix from src to dst
def get_H_matrix(src, dst):
    # minimum number of matches we want find between these two images
    min_match_threshold = 10

    # initiate feature detector, currently use SIFT, may try orb later
    sift = cv.SIFT_create()

    # find the keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(src,None)
    kp2, des2 = sift.detectAndCompute(dst,None)

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks = 50)
    flann = cv.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1,des2,k=2)

    # store all the good matches as per Lowe's ratio test.
    good = []
    for m,n in matches:
        if m.distance < 0.7*n.distance:
            good.append(m)

    if len(good)>min_match_threshold:
        src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
        dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
        H, _ = cv.findHomography(src_pts, dst_pts, cv.RANSAC,5.0)
    else:
        raise ValueError( "Not enough matches are found - {}/{}".format(len(good), min_match_threshold) )

    return H

In [None]:
# fill i_th frame's hole using adjacent frames per gap
def fill_hole(background_frames, foreground_labels, i, gap = 1):
    """
    :param frames: list of background frames with holes
    :param foreground_labels: list of foreground labels for each frame
    :param i: index of the frame we want fill
    :param gap: is the distance unit between target frame and current frame when filling hole
    :return: hole_filled frame
    """
    assert len(background_frames) == len(foreground_labels)
    assert i in range(len(background_frames))
    assert gap >= 1

    # should not change original set, otherwise when we are using this frame to fill other frames' holes, result would be a mess!
    cur_label = foreground_labels[i].copy()
    res_frame = background_frames[i]
    left = 1
    right = 1
    last_direction = False

    # while there are still pixels not filled in current frame
    while len(cur_label) > 0:
        # we iterate first to right, then to left, then back to right, then back to left... 
        # each time we try to grab some pixels from target frame which are not foreground pixels there
        target_index = i + right * gap if not last_direction else i - left * gap

        if 0 <= target_index < len(background_frames):
            target_frame_foreground_label = foreground_labels[target_index]
            srcs = [list(t) for t in cur_label]

            # point x = j, y = i, so reverse below in a[::-1]
            # perspectiveTransform all pixels in current foreground to target index frame
            # perspectiveTransform needs 3 dimensions, so we manually wrap a dimension to make below 1 * N * 2
            srcpts = np.array([[a[::-1] for a in srcs]]).astype(np.float32)
            H = get_H_matrix(background_frames[i], background_frames[target_index])
            dstpts = np.rint(cv.perspectiveTransform(srcpts, H)[0]).astype(int)
            # each pixel in current frame's foreground is perspective transformed to target frame
            # for each of these pixels, we check if its corresponding pixel is not in target frame's foreground label
            # then check its corresponding pixel is in range
            # if all satisfied, fill back
            for (x, y), (corresponding_x, corresponding_y) in zip(np.rint(srcpts[0]).astype(int), dstpts):
                if (corresponding_y, corresponding_x) not in target_frame_foreground_label and 0 <= corresponding_x < width and 0 <= corresponding_y < height:
                    cur_label.remove((y, x))
                    res_frame[y][x] = background_frames[target_index][corresponding_y][corresponding_x]

            # if this time to right
            if not last_direction:
                right += 1
            else:
                left += 1

        last_direction = not last_direction

    return res_frame

## 2.3 Generate videos with foreground objects removed and hole filled (for application 3)

In [None]:
# fill all background frames' holes and save them to image and video
def generate_hole_filled_background_video():
    if os.path.exists(HOLE_FILLED_BACKGROUND_VIDEO_PATH):
        return
    background_frames = read_frames(OUTPUT_BACKGROUND_VIDEO_PATH)
    out_video = cv.VideoWriter(HOLE_FILLED_BACKGROUND_VIDEO_PATH, cv.VideoWriter_fourcc(*'XVID'), fps, (width, height))
    assert out_video.isOpened()
    if not os.path.exists('hole_filled_background'):
        os.mkdir('hole_filled_background')
        
    try:
        for i in tqdm(range(len(background_frames))):
            res = fill_hole(background_frames, foreground_labels, i, 6)
            cv.imwrite(os.path.join('hole_filled_background','{}_background_{}.jpg'.format(FILENAME.split('.')[0], i)), res)
            # out_video.write(res)
            
    except KeyboardInterrupt:
        print("Interrupted!")

    out_video.release()

In [None]:
generate_hole_filled_background_video()

# Step 3: Generate panorama for the background

In [None]:
# helper functions

# get lower resolution image for faster H matrix computation
def scale_image(img, scale_factor=1):
    return cv.resize(img, [int(round(img.shape[1] * scale_factor)), int(round(img.shape[0] * scale_factor))], interpolation=cv.INTER_LINEAR_EXACT)

In [None]:
# stitch src to dst, with dst padded, return padded result with shifted_homography matrix, for possible future mapping into the panorama
def stitch(dst, src, H):
    assert H.shape == (3,3)

    # ensure a legal homography
    H = H / H[2, 2]  
    src_h, src_w = src.shape[:2]
    lin_homg_pts = np.array([
        [0, src_w, src_w, 0],
        [0, 0, src_h, src_h],
        [1, 1, 1, 1]])
        # transform points
    transf_lin_homg_pts = H.dot(lin_homg_pts)
    transf_lin_homg_pts /= transf_lin_homg_pts[2, :]

    # find min and max points
    min_x = np.floor(np.min(transf_lin_homg_pts[0])).astype(int)
    min_y = np.floor(np.min(transf_lin_homg_pts[1])).astype(int)
    max_x = np.ceil(np.max(transf_lin_homg_pts[0])).astype(int)
    max_y = np.ceil(np.max(transf_lin_homg_pts[1])).astype(int)

    # add translation to the transformation matrix to shift to positive values
    anchor_x, anchor_y = 0, 0
    transl_transf = np.eye(3, 3)
    if min_x < 0:
        anchor_x = -min_x
        transl_transf[0, 2] += anchor_x
    if min_y < 0:
        anchor_y = -min_y
        transl_transf[1, 2] += anchor_y
    shifted_transf = transl_transf.dot(H)
    shifted_transf /= shifted_transf[2, 2]

    dst_h, dst_w = dst.shape[:2]
    padding = [anchor_y, max(max_y, dst_h) - dst_h,
                  anchor_x, max(max_x, dst_w) - dst_w]
    
    stitched_image = cv.warpPerspective(
        src, shifted_transf, (dst_w + padding[2] + padding[3], dst_h + padding[0] + padding[1]),
        flags=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT, borderValue=0)
    
    # make the final effect as, the new image(i.e. the src image) is padding the external hole of the padded_dst
    for i in range(0, dst_h):
        for j in range(0, dst_w):
            if any(dst[i][j]):
                stitched_image[i + padding[0]][j + padding[2]] = dst[i][j]

    return stitched_image, shifted_transf

In [None]:
# with a lot lot lot of tests regarding how step size and the way that homography is generated in each iteration would affect final panorama quality,
# we finally chose to use new hole_filled_frames to compute homography matrix with so-far generated panorama and then use this H to stitch and create new panorama for next iteration
# we grow this panorama follow right, left, right, left pattern for better result
def generate_panorama_using_hole_filled_frames(right_step =30, left_step=30, anchor_index=-1, intermediate_panoramas=None):
    hole_filled_frames = read_frames(HOLE_FILLED_BACKGROUND_VIDEO_PATH)
    assert -1 <= anchor_index < len(hole_filled_frames)
    assert right_step > 0 and left_step > 0

    anchor_index = len(hole_filled_frames) // 2 if anchor_index == -1 else anchor_index
    shifted_H_persist = {}

    try:
        panorama = hole_filled_frames[anchor_index].copy()
        left = anchor_index - left_step
        right = anchor_index + right_step
        last_direction = False
        count = 1

        while left >= 0 or right < len(hole_filled_frames):
            print('iteration {}, time: {}'.format(count, time.ctime()))
            count += 1
            # the panorama grows first to right then to left
            if not last_direction:
                if right < len(hole_filled_frames):
                    H = get_H_matrix(scale_image(hole_filled_frames[right], scale_factor=1), scale_image(panorama, scale_factor=1))
                    panorama, shifted_H = stitch(panorama, hole_filled_frames[right], H)
                    shifted_H_persist[(right, anchor_index)] = shifted_H
                    if intermediate_panoramas is not None:
                        intermediate_panoramas.append(panorama)
                    right += right_step
            else:
                if left >= 0:
                    H = get_H_matrix(scale_image(hole_filled_frames[left], scale_factor=1), scale_image(panorama, scale_factor=1))
                    panorama, shifted_H = stitch(panorama, hole_filled_frames[left], H)
                    shifted_H_persist[(left, anchor_index)] = shifted_H
                    if intermediate_panoramas is not None:
                        intermediate_panoramas.append(panorama)
                    left -= left_step
            last_direction = not last_direction
        return panorama, shifted_H_persist
    except KeyboardInterrupt:
        print('Interrupted!')

In [None]:
intermediate_panoramas = []
panorama, shifted_H_persist = generate_panorama_using_hole_filled_frames(right_step=48, left_step=48, anchor_index=-1, intermediate_panoramas=intermediate_panoramas)
cv.imwrite(OUTPUT_PANORAMA_IMG_PATH, panorama)

In [None]:
# you can view the intermediate panorama results here
for p in intermediate_panoramas:
    cv.imshow('intermediate panorama', p)
    cv.waitKey(0)
    cv.destroyAllWindows()
    # this line is for macos compatibility
    cv.waitKey(1)

# Step 4: Create Application 1

In [None]:
def interpolate_foreground_on_panorama(panorama_img, foreground_labels_set):
    for i in range(4, len(panorama_img) - 4):
        for j in range(4, len(panorama_img[0]) - 4):
            if (i, j) in foreground_labels_set:
                continue
            fore_count = 0
            cur_pix = np.array([0, 0, 0])
            for m in range(i - 2, i + 3):
                for n in range(j - 2, j + 3):
                    if m == i and n == j:
                        continue
                    if (m, n) in foreground_labels_set:
                        fore_count += 1
                        cur_pix = np.add(cur_pix, panorama_img[m][n])
            if fore_count >= 5:
                panorama_img[i][j] = cur_pix / fore_count

In [None]:
# Application 1
def fill_in_foreground_and_generate_img(n):

    cap = cv.VideoCapture(INPUT_VIDEO_PATH)
    if not cap.isOpened:
        raise IOError("Open video failed!")

    panorama = cv.imread(OUTPUT_PANORAMA_IMG_PATH)
    panorama_foreground_set = set()
    panorama_height = len(panorama)
    panorama_width = len(panorama[0])
    try:
        for i in tqdm(range(1, int(cap.get(cv.CAP_PROP_FRAME_COUNT)))):
            ret, prev = cap.read()
            if not ret:
                raise IOError("Read frame failed!")

            if i % n == 0:
                H = get_H_matrix(prev, panorama)
                cur_label = foreground_labels[i].copy()
                srcs = [list(t) for t in cur_label]
                srcpts = np.array([[a[::-1] for a in srcs]]).astype(np.float32)
                dstpts = np.rint(cv.perspectiveTransform(srcpts, H)[0]).astype(int)
                for (x, y), (corresponding_x, corresponding_y) in zip(np.rint(srcpts[0]).astype(int), dstpts):
                    if 0 <= corresponding_x < panorama_width and 0 <= corresponding_y < panorama_height:
                        panorama[corresponding_y][corresponding_x] = prev[y][x]
                        panorama_foreground_set.add((corresponding_y, corresponding_x))

    except KeyboardInterrupt:
        print('Interrupted!')

    interpolate_foreground_on_panorama(panorama, panorama_foreground_set)
        # cv.imshow(str(frame_count), prev)
        # cv.waitKey(0)
        # cv.destroyAllWindows()
        # cv.waitKey(1)

    cap.release()
    cv.imshow('panorama', panorama)
    cv.imwrite(os.path.join(OUTPUT_PATH, "{}_panorama_output1.jpg".format(FILENAME.split('.')[0])), panorama)
    cv.waitKey(0)
    cv.destroyAllWindows()
    cv.waitKey(1)

In [None]:
# fill in foreground every n frames
fill_in_foreground_and_generate_img(56)

# Step 5: Create Application 2

In [None]:
# Application 2 pre video (Firstly generate all images)
def fill_in_foreground_and_generate_all_imgs(output_folder, start, end):
    cap = cv.VideoCapture(INPUT_VIDEO_PATH)

    if not cap.isOpened:
        raise IOError("Open video failed!")
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    panorama = cv.imread(OUTPUT_PANORAMA_IMG_PATH)
    panorama_height = len(panorama)
    panorama_width = len(panorama[0])
    panorama_foreground_set = set()
    try:
        if start < 0:
            start = 0
        if start >= int(cap.get(cv.CAP_PROP_FRAME_COUNT)):
            return
        if end > int(cap.get(cv.CAP_PROP_FRAME_COUNT)):
            end = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
        for i in tqdm(range(0, int(cap.get(cv.CAP_PROP_FRAME_COUNT)))):
            ret, prev = cap.read()
            if not start <= i < end:
                continue
            if not ret:
                raise IOError("Read frame failed!")

            panorama_foreground_set.clear()
            cur_panorama = panorama.copy()
            H = get_H_matrix(prev, cur_panorama)
            cur_label = foreground_labels[i].copy()
            srcs = [list(t) for t in cur_label]
            if len(srcs) != 0:
                srcpts = np.array([[a[::-1] for a in srcs]]).astype(np.float32)
                dstpts = np.rint(cv.perspectiveTransform(srcpts, H)[0]).astype(int)
                for (x, y), (corresponding_x, corresponding_y) in zip(np.rint(srcpts[0]).astype(int), dstpts):
                    if 0 <= corresponding_x < panorama_width and 0 <= corresponding_y < panorama_height:
                        cur_panorama[corresponding_y][corresponding_x] = prev[y][x]
                        panorama_foreground_set.add((corresponding_y, corresponding_x))

                interpolate_foreground_on_panorama(cur_panorama, panorama_foreground_set)
            cv.imwrite(f"{output_folder}/{FILENAME.split('.')[0]}_panorama_with_foreground_{i}.jpg", cur_panorama)
    except KeyboardInterrupt:
        print('Interrupted!')


    cap.release()

In [None]:
fill_in_foreground_and_generate_all_imgs('output3_best', 0, 595)

In [None]:
def createAppication2Video(panoBackground: np.ndarray, frames: np.ndarray,
    start: tuple[int, int], end: tuple[int, int], dimension=(1920, 1080)) -> list[np.ndarray]:
    start = updatePosition(*start, *dimension, panoBackground.shape[1], panoBackground.shape[0])
    end = updatePosition(*end, *dimension, panoBackground.shape[1], panoBackground.shape[0])

    # print("panoBackground.shape[1]: ", panoBackground.shape[1])
    # print("panoBackground.shape[0]: ", panoBackground.shape[0])

    dx = (end[0] - start[0]) / frames.shape[0]
    dy = (end[1] - start[1]) / frames.shape[1]
    halfWidth = int(0.5 * dimension[0])
    halfHeight = int(0.5 * dimension[1])

    new_frames = []
    camera_center: list[float] = [start[0], start[1]]

    for i in range(frames.shape[0]):
        frame = frames[i]
        lx, rx = int(camera_center[0] - halfWidth), int(camera_center[0] + halfWidth)
        ly, ry = int(camera_center[1] - halfHeight), int(camera_center[1] + halfHeight)

        # print("lx, rx: ", lx, rx)
        # print("ly, ry: ", ly, ry)

        new_frames.append(frame[ly:ry, lx:rx])
        camera_center[0] += dx
        camera_center[1] += dy
    return new_frames

def updatePosition(x: int, y: int, cameraWidth: int,
                           cameraHeight: int, backgroundWidth: int, backgroundHeight: int) -> tuple[int, int]:

    # print("width: ", cameraWidth // 2, min(backgroundWidth - cameraWidth // 2, x))
    # print("height: ", cameraHeight // 2, min(backgroundHeight - cameraHeight // 2, y))

    return (
        max(cameraWidth // 2, min(backgroundWidth - cameraWidth // 2, x)),
        max(cameraHeight // 2, min(backgroundHeight - cameraHeight // 2, y)),
    )

def saveApplication2Output(filename: str, frames: list[np.ndarray] | np.ndarray,
          wid: int, hei: int) -> None:
    file = cv.VideoWriter(f'out/{filename}.mp4', cv.VideoWriter_fourcc(*'mp4v'), 30, (wid, hei))
    for frame in frames:
        file.write(frame)
    file.release()

resolution = (1920, 1080)

# panoVideo = loadVideo('test3_panorama_video.mp4')

panoVideo = []

try:
    for i in tqdm(range(0, 595)):
        cur_img_name = f"output3_best/{FILENAME.split('.')[0]}_panorama_with_foreground_{i}.jpg"

        if os.path.isfile(cur_img_name):
            curFrame = cv.imread(cur_img_name)
            curFrame = cv.resize(curFrame, (int(curFrame.shape[1] * 0.44), int(curFrame.shape[0] * 0.44)))
            panoVideo.append(curFrame)
            # panorama_video_out.write(cv.imread(cur_img_name))
except KeyboardInterrupt:
    print('Interrupted!')

pano = cv.imread(OUTPUT_PANORAMA_IMG_PATH)
out2 = createAppication2Video(pano, np.array(panoVideo), (600, 10), (3120, 10), resolution)
saveApplication2Output('test3Application2output', out2, 1920, 1080)