In [8]:
import os
os.chdir("..")  # go to project root
print(f"cwd: {os.getcwd()}")  # sanity check

cwd: /home/dude-desktop/dev/cs760


In [9]:
import cv2
import numpy as np
import torch
from diffusers import StableDiffusionInpaintPipeline
from PIL import Image
from tqdm import tqdm


In [None]:
def inpaint_video_borders(video_path, mask_path, output_path, prompt, batch_size=64):
    assert os.path.exists(video_path), f"Video path {video_path} does not exist."
    assert os.path.exists(mask_path), f"Mask path {mask_path} does not exist"
    # loading video and mask with opencv
    video_capture = cv2.VideoCapture(video_path)
    mask_capture = cv2.VideoCapture(mask_path)

    fps = video_capture.get(cv2.CAP_PROP_FPS)
    width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))

    frames = []
    while True:
        ret, frame = video_capture.read()
        if not ret:
            break
        frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    mask_frames = []
    while True:
        ret, frame = mask_capture.read()
        if not ret:
            break
        mask_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))

    video_capture.release()
    mask_capture.release()

    if len(frames) != len(mask_frames):
        raise ValueError("The number of frames in the video and mask must be the same.")
    

    # load  stable diff inpainting pipeline
    pipe = StableDiffusionInpaintPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-inpainting",
        dtype=torch.float16,
    )
    pipe = pipe.to("cuda")

    output_frames = []
    for i in range(0, len(frames), batch_size):
        frame_batch = frames[i:i + batch_size]
        mask_batch = mask_frames[i:i + batch_size]

        with torch.autocast("cuda"):
            output = pipe(prompt=prompt, image=frame_batch, mask_image=mask_batch).images
        output_frames.extend(output)

    # combine outputted frames back into video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    for frame in output_frames:
        frame = frame.astype(np.uint8)
        if len(frame.shape) == 2:  # convert grayscale to BGR
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
        out.write(frame)

    out.release()
    print(f"Inpainted video saved to {output_path}")

dummy_video_path = "out_pairs/stabilised/_Boom_Snap_Clap__challenge_clap_u_nm_np1_fr_med_1.avi"
dummy_mask_path = "out_pairs/masks/_Boom_Snap_Clap__challenge_clap_u_nm_np1_fr_med_1_mask.avi"
output_video_path = "outputs/output_video.avi"

inpaint_video_borders(
    video_path=dummy_video_path,
    mask_path=dummy_mask_path,
    output_path=output_video_path,
    prompt="inpaint the borders",
    batch_size=8 
)

Loaded 78 frames from video and mask.
