In [1]:
import cv2
import numpy as np
import torch
from PIL import Image
import tqdm as tqdm

In [2]:
!pip install -qq -U diffusers transformers ftfy accelerate

In [3]:
import torch
from diffusers import AutoPipelineForInpainting
from diffusers.utils import load_image, make_image_grid

pipeline = AutoPipelineForInpainting.from_pretrained(
    "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16,variant="fp16"
)
pipeline.enable_model_cpu_offload()

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

2024-08-01 11:47:53.790139: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-01 11:47:53.790271: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-01 11:47:53.900917: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  deprecate("VQEncoderOutput", "0.31", deprecation_message)
  deprecate("VQModel", "0.31", deprecation_message)


model_index.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

safety_checker/config.json:   0%|          | 0.00/4.78k [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/748 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/608M [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/1.72G [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [4]:
def pad_resize(frame, target_width=1280, target_height=720):
    org_height, org_width = frame.shape[:2]
    scale = min(target_height / org_height, target_width / org_width)

    new_width = int(org_width * scale)
    new_height = int(org_height * scale)

    resized_frame = cv2.resize(frame, (new_width, new_height))
    new_frame = np.zeros((target_height, target_width, 3), dtype=np.uint8)

    y_offset = (target_height - new_height) // 2
    x_offset = (target_width - new_width) // 2
    new_frame[y_offset:y_offset + new_height, x_offset:x_offset + new_width] = resized_frame

    return new_frame, x_offset, new_width

In [5]:
def inpaint_frame(frame, mask, pipeline):
    # frame and mask are resized to 512x512 for inpainting
    frame_resized = cv2.resize(frame, (512, 512))
    mask_resized = cv2.resize(mask, (512, 512))
    
    # Convert to PIL Image
    frame_image = Image.fromarray(frame_resized)
    mask_image = Image.fromarray(mask_resized)
    
    try:
        # Perform inpainting
        inpainted_frame = pipeline(prompt=" ", image=frame_image, mask_image=mask_image).images[0]
        
        # Resize inpainted frame back to original size
        inpainted_frame = inpainted_frame.resize((frame.shape[1], frame.shape[0]))
        return np.array(inpainted_frame)
    except Exception as e:
        print(f"Error in inpainting frame: {e}")
        return frame

In [6]:
def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
        else:
            break
    cap.release()
    return frames


In [7]:
def reconstruct_video(frames, output_video_path, fps=3):
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    for frame in frames:
        out.write(frame)
    out.release()

In [8]:
def converter(input_video, output_video, frame_rate_reduction=1):

    frames = extract_frames(input_video)

    inpainted_frames = []
    for i, frame in enumerate(frames):
        if i % frame_rate_reduction == 0:  # Process frames based on reduction
            padded_frame, start_x, new_width = pad_resize(frame)
            mask = np.zeros_like(padded_frame[:, :, 0])
            mask[:, :start_x] = 1
            mask[:, start_x + new_width:] = 1
            inpainted_frame = inpaint_frame(padded_frame, mask, pipeline)
            inpainted_frames.append(inpainted_frame)

    if not output_video.endswith('.mp4'):
        output_video += '.mp4'
    reconstruct_video(inpainted_frames, output_video)
    return output_video

In [11]:
converter("/kaggle/input/data-input2/video.mp4",
          "/kaggle/working/final.mp4")

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

'/kaggle/working/final.mp4'

In [12]:
frame_final=extract_frames("/kaggle/working/final.mp4")
frame_input=extract_frames("/kaggle/input/data-input2/video.mp4")

for f in frame_input:
    print(f'input video frame: {f.shape}')

print("-"*50)

for f in frame_final:
    print(f'output video frame: {f.shape}')

input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
input video frame: (480, 640, 3)
--------------------------------------------------
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)
output video frame: (720, 1280, 3)

### 