In [None]:
# [1] SETUP ENVIRONMENT
!pip install torch torchvision opencv-python
!pip install git+https://github.com/PeterL1n/BackgroundMattingV2.git
!wget https://github.com/PeterL1n/BackgroundMattingV2/releases/download/v1.0.0/rvm_mobilenetv3.pth

# [2] UPLOAD YOUR VIDEO (RUN THIS CELL FIRST)
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# [3] BACKGROUND REMOVAL CODE
import torch
import cv2
import numpy as np
from torchvision.transforms import ToTensor, ToPILImage
from background_matting_v2.model import MattingNetwork

# Initialize RVM
model = MattingNetwork(variant='mobilenetv3').eval().cuda()  # or 'resnet50' for better quality
model.load_state_dict(torch.load('rvm_mobilenetv3.pth'))

# Video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Output setup
output_video = 'output.mp4'
out = cv2.VideoWriter(output_video,
                     cv2.VideoWriter_fourcc(*'mp4v'),
                     fps,
                     (width, height))

# Process frames
with torch.no_grad():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert to tensor
        frame_tensor = ToTensor()(frame).unsqueeze(0).cuda()

        # Background removal
        pha, fgr = model(frame_tensor)[:2]

        # Composite with green screen background
        bg = torch.tensor([0, 1, 0]).view(1, 3, 1, 1).cuda()  # Green
        com = pha * fgr + (1 - pha) * bg

        # Convert back to OpenCV format
        com = ToPILImage()(com.squeeze())
        com = cv2.cvtColor(np.array(com), cv2.COLOR_RGB2BGR)
        out.write(com)

cap.release()
out.release()

# [4] DOWNLOAD RESULT
from google.colab import files
files.download(output_video)
print("Done! Background removed video downloaded")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Saving SampleVideo_320x240_1mb.3gp to SampleVideo_320x240_1mb.3gp


ModuleNotFoundError: No module named 'background_matting_v2'

In [None]:
# [1] PROPER SETUP (RUN THIS FIRST)
!pip install torch torchvision opencv-python
!git clone https://github.com/PeterL1n/BackgroundMattingV2.git
%cd BackgroundMattingV2
!pip install -e .
%cd ..
!wget https://github.com/PeterL1n/BackgroundMattingV2/releases/download/v1.0.0/rvm_mobilenetv3.pth

# [2] UPLOAD YOUR VIDEO
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# [3] CORRECT IMPORT & PROCESSING
import torch
import cv2
import numpy as np
from torchvision.transforms import ToTensor, ToPILImage

# FIXED IMPORT (use this instead)
from model.matting_network import MattingNetwork  # Correct import path

# Initialize RVM
model = MattingNetwork(variant='mobilenetv3').eval().cuda()
model.load_state_dict(torch.load('rvm_mobilenetv3.pth'))

# Video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Output setup
output_video = 'output.mp4'
out = cv2.VideoWriter(output_video,
                     cv2.VideoWriter_fourcc(*'mp4v'),
                     fps,
                     (width, height))

# Process frames
with torch.no_grad():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_tensor = ToTensor()(frame).unsqueeze(0).cuda()
        pha, fgr = model(frame_tensor)[:2]

        # Green screen background
        bg = torch.tensor([0, 1, 0], device='cuda').view(1, 3, 1, 1)
        com = pha * fgr + (1 - pha) * bg

        com = ToPILImage()(com.squeeze())
        com = cv2.cvtColor(np.array(com), cv2.COLOR_RGB2BGR)
        out.write(com)

cap.release()
out.release()

# [4] DOWNLOAD RESULT
from google.colab import files
files.download(output_video)
print("✅ Background removed video downloaded!")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

KeyboardInterrupt: 

In [None]:
# [1] INSTALLATION (RUN THIS FIRST)
!pip install torch torchvision opencv-python
!git clone https://github.com/PeterL1n/BackgroundMattingV2.git
%cd BackgroundMattingV2
!pip install -e .
%cd ..
!wget https://github.com/PeterL1n/BackgroundMattingV2/releases/download/v1.0.0/rvm_mobilenetv3.pth

# [2] UPLOAD YOUR VIDEO
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# [3] BACKGROUND REMOVAL CODE
import torch
import cv2
import numpy as np
import sys
from torchvision.transforms import ToTensor, ToPILImage

# FIX: Add to Python path
sys.path.append('/content/BackgroundMattingV2')

# CORRECT IMPORT
from model.matting_network import MattingNetwork

# Initialize model
model = MattingNetwork(variant='mobilenetv3').eval().cuda()
model.load_state_dict(torch.load('rvm_mobilenetv3.pth'))

# Video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

out = cv2.VideoWriter('output.mp4',
                     cv2.VideoWriter_fourcc(*'mp4v'),
                     fps,
                     (width, height))

with torch.no_grad():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_tensor = ToTensor()(frame).unsqueeze(0).cuda()
        pha, fgr = model(frame_tensor)[:2]

        # White background
        com = pha * fgr + (1 - pha) * torch.ones_like(fgr)
        com = ToPILImage()(com.squeeze())
        com = cv2.cvtColor(np.array(com), cv2.COLOR_RGB2BGR)
        out.write(com)

cap.release()
out.release()

# [4] DOWNLOAD RESULT
from google.colab import files
files.download('output.mp4')
print("✅ Done! Background removed video downloaded")

fatal: destination path 'BackgroundMattingV2' already exists and is not an empty directory.
/content/BackgroundMattingV2
Obtaining file:///content/BackgroundMattingV2
[31mERROR: file:///content/BackgroundMattingV2 does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.[0m[31m
[0m/content
--2025-04-26 14:54:19--  https://github.com/PeterL1n/BackgroundMattingV2/releases/download/v1.0.0/rvm_mobilenetv3.pth
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-04-26 14:54:20 ERROR 404: Not Found.



Saving large.mp4 to large.mp4


ModuleNotFoundError: No module named 'model.matting_network'

new code

In [None]:
!pip install torch torchvision opencv-python pillow numpy

In [None]:
!pip install torch torchvision opencv-python pillow numpy

In [None]:
import torch
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import cv2

def load_rvm_model(model_type='mobilenetv3'):
    """Load the RVM model from torchhub"""
    model = torch.hub.load("PeterL1n/RobustVideoMatting", model_type, pretrained=True)
    model = model.eval()  # Set to evaluation mode
    if torch.cuda.is_available():
        model = model.cuda()
    return model

def remove_background_image(model, image_path, output_path, background_color=(0, 255, 0)):
    """
    Remove background from a single image
    :param model: RVM model
    :param image_path: path to input image
    :param output_path: path to save output image
    :param background_color: (R, G, B) color for new background (default green)
    """
    # Load image
    image = Image.open(image_path).convert('RGB')
    src = ToTensor()(image).unsqueeze(0)  # [1, 3, H, W]

    if torch.cuda.is_available():
        src = src.cuda()

    # Inference
    with torch.no_grad():
        fgr, pha = model(src)[:2]  # We only need the foreground and alpha

    # Composite with new background
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
    if torch.cuda.is_available():
        bg = bg.cuda()
    out = fgr * pha + bg * (1 - pha)

    # Save result
    out = out.squeeze(0).permute(1, 2, 0).cpu().numpy()
    out = (out * 255).astype(np.uint8)
    cv2.imwrite(output_path, cv2.cvtColor(out, cv2.COLOR_RGB2BGR))

def remove_background_video(model, video_path, output_path, background_color=(0, 255, 0)):
    """
    Remove background from a video
    :param model: RVM model
    :param video_path: path to input video
    :param output_path: path to save output video
    :param background_color: (R, G, B) color for new background (default green)
    """
    # Initialize video reader and writer
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = model.initialize_recurrent_states(1, height, width)

    # Process video frame by frame
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame).unsqueeze(0)
        if torch.cuda.is_available():
            src = src.cuda()

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec)

        # Composite with new background
        bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
        if torch.cuda.is_available():
            bg = bg.cuda()
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy and write to output
        out_frame = out_frame.squeeze(0).permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)
        out.write(out_frame)

    # Release resources
    cap.release()
    out.release()

if __name__ == '__main__':
    # Example usage
    model = load_rvm_model('mobilenetv3')  # or 'resnet50' for better quality

    # White background

    # For video
    remove_background_video(
        model,
        '/content/268290_tiny (1).mp4',
        'output.mp4',
        background_color=(0, 0, 255)  # Red background
    )

Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


AttributeError: 'MattingNetwork' object has no attribute 'initialize_recurrent_states'

In [None]:
import torch
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import cv2

def load_rvm_model(model_type='mobilenetv3'):
    """Load the RVM model from torchhub"""
    model = torch.hub.load("PeterL1n/RobustVideoMatting", model_type, pretrained=True)
    model = model.eval()  # Set to evaluation mode
    if torch.cuda.is_available():
        model = model.cuda()
    return model

def remove_background_image(model, image_path, output_path, background_color=(0, 255, 0)):
    """
    Remove background from a single image
    """
    # Load image
    image = Image.open(image_path).convert('RGB')
    src = ToTensor()(image).unsqueeze(0)  # [1, 3, H, W]

    if torch.cuda.is_available():
        src = src.cuda()

    # Inference
    with torch.no_grad():
        fgr, pha = model(src)[:2]  # We only need the foreground and alpha

    # Composite with new background
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
    if torch.cuda.is_available():
        bg = bg.cuda()
    out = fgr * pha + bg * (1 - pha)

    # Save result
    out = out.squeeze(0).permute(1, 2, 0).cpu().numpy()
    out = (out * 255).astype(np.uint8)
    cv2.imwrite(output_path, cv2.cvtColor(out, cv2.COLOR_RGB2BGR))

def remove_background_video(model, video_path, output_path, background_color=(0, 255, 0)):
    """
    Remove background from a video
    """
    # Initialize video reader and writer
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states (new way)
    downsample_ratio = 0.25  # Adjust based on your video resolution
    rec = [None] * 4  # RVM uses 4 recurrent states

    # Process video frame by frame
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame).unsqueeze(0)
        if torch.cuda.is_available():
            src = src.cuda()

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with new background
        bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
        if torch.cuda.is_available():
            bg = bg.cuda()
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy and write to output
        out_frame = out_frame.squeeze(0).permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)
        out.write(out_frame)

    # Release resources
    cap.release()
    out.release()

if __name__ == '__main__':
    # Example usage
    model = load_rvm_model('mobilenetv3')  # or 'resnet50' for better quality

  #

    # For video
    remove_background_video(
        model,
        '/content/large.mp4',
        'output1.mp4',
        background_color=(0, 0, 255)  # Red background
    )

Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


In [None]:
import torch
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import cv2

def load_rvm_model(model_type='mobilenetv3'):
    """Load the RVM model from torchhub"""
    model = torch.hub.load("PeterL1n/RobustVideoMatting", model_type, pretrained=True)
    model = model.eval()  # Set to evaluation mode
    if torch.cuda.is_available():
        model = model.cuda()
    return model

def remove_background_image(model, image_path, output_path, background_color=(0, 255, 0)):
    """
    Remove background from a single image
    """
    # Load image
    image = Image.open(image_path).convert('RGB')
    src = ToTensor()(image).unsqueeze(0)  # [1, 3, H, W]

    if torch.cuda.is_available():
        src = src.cuda()

    # Inference
    with torch.no_grad():
        fgr, pha = model(src)[:2]  # We only need the foreground and alpha

    # Composite with new background
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
    if torch.cuda.is_available():
        bg = bg.cuda()
    out = fgr * pha + bg * (1 - pha)

    # Save result
    out = out.squeeze(0).permute(1, 2, 0).cpu().numpy()
    out = (out * 255).astype(np.uint8)
    cv2.imwrite(output_path, cv2.cvtColor(out, cv2.COLOR_RGB2BGR))

def remove_background_video(model, video_path, output_path, background_color=(0, 0, 0)):
    """
    Remove background from a video
    """
    # Initialize video reader and writer
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states (new way)
    downsample_ratio = 0.25  # Adjust based on your video resolution
    rec = [None] * 4  # RVM uses 4 recurrent states

    # Process video frame by frame
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame).unsqueeze(0)
        if torch.cuda.is_available():
            src = src.cuda()

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with new background
        bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255.
        if torch.cuda.is_available():
            bg = bg.cuda()
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy and write to output
        out_frame = out_frame.squeeze(0).permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)
        out.write(out_frame)

    # Release resources
    cap.release()
    out.release()

if __name__ == '__main__':
    # Example usage
    model = load_rvm_model('mobilenetv3')  # or 'resnet50' for better quality

  #

    # For video
    remove_background_video(
        model,
        '/content/268290_tiny (1).mp4',
        'output3.mp4',
        background_color=(0, 0, 255)  # Red background
    )

Downloading: "https://github.com/PeterL1n/RobustVideoMatting/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth" to /root/.cache/torch/hub/checkpoints/rvm_mobilenetv3.pth
100%|██████████| 14.5M/14.5M [00:00<00:00, 106MB/s] 


In [None]:
pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m39.

In [None]:
import cv2
import mediapipe as mp
import numpy as np

# Load video
cap = cv2.VideoCapture('/content/268290_tiny (1).mp4')

# Initialize MediaPipe
mp_segmentation = mp.solutions.selfie_segmentation
segment = mp_segmentation.SelfieSegmentation(model_selection=1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to RGB and apply segmentation
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = segment.process(rgb)

    # Create mask and apply background
    mask = result.segmentation_mask > 0.1
    white_bg = np.ones(frame.shape, dtype=np.uint8) * 255
    output = np.where(mask[..., None], frame, white_bg)

    cv2.imshow("Removed Background", output)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

DisabledFunctionError: cv2.imshow() is disabled in Colab, because it causes Jupyter sessions
to crash; see https://github.com/jupyter/notebook/issues/3935.
As a substitution, consider using
  from google.colab.patches import cv2_imshow


In [None]:
# STEP 1: Install required libraries and clone RVM repo
!pip install -q opencv-python ffmpeg-python gdown
!git clone https://github.com/PeterL1n/RobustVideoMatting.git
%cd RobustVideoMatting
!pip install -q -r requirements.txt

# STEP 2: Download model weights (ResNet50)
!gdown https://huggingface.co/PeterL1n/RobustVideoMatting/resolve/main/rvm_resnet50.pth -O rvm_resnet50.pth

# STEP 3: Upload your video
from google.colab import files
uploaded = files.upload()
input_video_path = list(uploaded.keys())[0]

# STEP 4: Load model
import torch
from model import MattingNetwork
import torchvision.transforms as T

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MattingNetwork('resnet50').to(device).eval()
model.load_state_dict(torch.load('rvm_resnet50.pth', map_location=device, weights_only=False))

# STEP 5: Prepare output folder and video capture
import cv2
import os
import numpy as np
from PIL import Image
import subprocess

output_frames_path = '/content/frames'
output_video_path = '/content/output_transparent.mov'
os.makedirs(output_frames_path, exist_ok=True)

cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width, height = int(cap.get(3)), int(cap.get(4))

r1 = r2 = r3 = r4 = None
to_tensor = T.ToTensor()
frame_idx = 0

print("🎞️ Processing video...")
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(rgb)
    src_tensor = to_tensor(pil_img).unsqueeze(0).to(device)

    with torch.no_grad():
        fgr, pha, r1, r2, r3, r4 = model(src_tensor, r1, r2, r3, r4, downsample_ratio=0.25)

    pha_np = pha[0, 0].cpu().numpy()
    fgr_np = fgr[0].permute(1, 2, 0).cpu().numpy()

    pha_np = cv2.resize(pha_np, (width, height))
    fgr_np = cv2.resize(fgr_np, (width, height))

    rgba = np.dstack((fgr_np, pha_np)) * 255
    rgba = rgba.astype(np.uint8)

    frame_path = os.path.join(output_frames_path, f"frame_{frame_idx:05d}.png")
    cv2.imwrite(frame_path, cv2.cvtColor(rgba, cv2.COLOR_RGBA2BGRA))
    frame_idx += 1

cap.release()
print("✅ Frame extraction done!")

# STEP 6: Convert frames to transparent .mov video
print("🎥 Encoding final transparent video...")
subprocess.run([
    'ffmpeg', '-y', '-framerate', str(fps),
    '-i', f'{output_frames_path}/frame_%05d.png',
    '-c:v', 'qtrle',
    output_video_path
])

print(f"✅ Done! Video saved at: {output_video_path}")


Cloning into 'RobustVideoMatting'...
remote: Enumerating objects: 211, done.[K
remote: Total 211 (delta 0), reused 0 (delta 0), pack-reused 211 (from 1)[K
Receiving objects: 100% (211/211), 9.00 MiB | 24.70 MiB/s, done.
Resolving deltas: 100% (81/81), done.
/content/RobustVideoMatting/RobustVideoMatting/RobustVideoMatting
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0mDownloading...
From: https://huggingface.co/PeterL1n/RobustVideoMatting/resolve/main/rvm_resnet50.pth
To: /content/RobustVideoMatting/RobustVideoMatting/RobustVideoMatting/rvm_resnet50.pth
100% 29.0/29.0 [00:00<00:00, 110kB/s]


Saving 268290_tiny (1).mp4 to 268290_tiny (1).mp4


UnpicklingError: pickle data was truncated

In [None]:
# STEP 1: Install dependencies
!pip install -q opencv-python ffmpeg-python
!git clone https://github.com/PeterL1n/RobustVideoMatting.git
%cd RobustVideoMatting
!pip install -q -r requirements.txt

# STEP 2: Download model using WGET (no corruption)
!wget https://huggingface.co/PeterL1n/RobustVideoMatting/resolve/main/rvm_resnet50.pth -O rvm_resnet50.pth
!ls -lh rvm_resnet50.pth  # ✅ Check size is around 167MB

# STEP 3: Upload video
from google.colab import files
uploaded = files.upload()
input_video_path = list(uploaded.keys())[0]


Cloning into 'RobustVideoMatting'...
remote: Enumerating objects: 211, done.[K
remote: Total 211 (delta 0), reused 0 (delta 0), pack-reused 211 (from 1)[K
Receiving objects: 100% (211/211), 9.00 MiB | 25.95 MiB/s, done.
Resolving deltas: 100% (81/81), done.
/content/RobustVideoMatting/RobustVideoMatting
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m--2025-04-30 17:57:27--  https://huggingface.co/PeterL1n/RobustVideoMatting/resolve/main/rvm_resnet50.pth
Resolving huggingface.co (huggingface.co)... 3.163.189.74, 3.163.189.114, 3.163.189.90, ...
Connecting to huggingface.co (huggingface.co)|3.163.189.74|:443... connected.
HTTP request sent, awaiting response... 401 Unauthorized

Username/Password Authentication Failed.
-rw-r--r-- 1 root root 0 Apr 30 17:57 rvm_resnet50.pth


In [None]:
# Robust Video Matting Background Removal - Colab Notebook
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Download RVM Model
import os
os.makedirs('checkpoint', exist_ok=True)

# Download MobileNetV3 model (faster)
!gdown https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth -O checkpoint/rvm_mobilenetv3.pth

# @title Step 3: Upload Video
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# @title Step 4: Process Video
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
variant = 'mobilenetv3'
checkpoint = 'checkpoint/rvm_mobilenetv3.pth'
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# Load model
model = torch.jit.load(checkpoint)
model = model.to(device).eval()

# Video reader
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Video writers
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

# Background color (change to your preferred color)
bg_color = np.array([120, 255, 155], dtype=np.uint8)  # Green screen

# Process frames
rec = [None] * 4  # Recurrent states
with torch.no_grad():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert and process frame
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = torch.from_numpy(frame).to(device).float() / 255
        src = src.permute(2, 0, 1).unsqueeze(0)  # [B, C, H, W]

        # Inference
        fgr, pha, *rec = model(src, *rec)

        # Composite with background
        pha = pha[0].permute(1, 2, 0).cpu().numpy()  # [H, W, 1]
        fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
        com = fgr * pha + bg_color * (1 - pha)

        # Write outputs
        comp_writer.write((com * 255).astype(np.uint8))
        alpha_writer.write((pha * 255).astype(np.uint8))

# Release resources
cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 5: Download Results
from google.colab import files
files.download(output_composition)
files.download(output_alpha)

print(f"Processing complete! Downloaded: {output_composition} and {output_alpha}")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m51.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Saving 268290_tiny (1).mp4 to 268290_tiny (1) (1).mp4


RuntimeError: PytorchStreamReader failed locating file constants.pkl: file not found

In [None]:
# Robust Video Matting Background Removal - Fixed Version
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Download Correct Model Weights
import os
os.makedirs('checkpoint', exist_ok=True)

# Download the FULL model package (not just .pth)
!wget https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.zip -O checkpoint/rvm_mobilenetv3.zip
!unzip checkpoint/rvm_mobilenetv3.zip -d checkpoint/
!rm checkpoint/rvm_mobilenetv3.zip

# @title Step 3: Upload Video
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# @title Step 4: Process Video (Fixed Loading)
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_path = '/content/RobustVideoMatting'
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# Verify model file exists
assert os.path.exists(model_path), "Model file not found!"

# PROPER model loading
try:
    model = torch.jit.load(model_path, map_location=device)
    model = model.to(device).eval()
except Exception as e:
    raise RuntimeError(f"Failed to load model: {str(e)}. Make sure you downloaded all model files.")

# Video processing (same as before)
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

bg_color = np.array([120, 255, 155], dtype=np.uint8)  # Green screen
rec = [None] * 4

with torch.no_grad():
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = torch.from_numpy(frame).to(device).float() / 255
        src = src.permute(2, 0, 1).unsqueeze(0)

        fgr, pha, *rec = model(src, *rec)

        pha = pha[0].permute(1, 2, 0).cpu().numpy()
        fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
        com = fgr * pha + bg_color * (1 - pha)

        comp_writer.write((com * 255).astype(np.uint8))
        alpha_writer.write((pha * 255).astype(np.uint8))

cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 5: Download Results
from google.colab import files
files.download(output_composition)
files.download(output_alpha)

print("✅ Processing complete! Check your downloads folder.")

--2025-04-30 19:30:04--  https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.zip
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-04-30 19:30:04 ERROR 404: Not Found.

Archive:  checkpoint/rvm_mobilenetv3.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of checkpoint/rvm_mobilenetv3.zip or
        checkpoint/rvm_mobilenetv3.zip.zip, and cannot find checkpoint/rvm_mobilenetv3.zip.ZIP, period.


Saving 268290_tiny (1).mp4 to 268290_tiny (1) (2).mp4


RuntimeError: Failed to load model: The provided filename /content/RobustVideoMatting is a directory. Make sure you downloaded all model files.

In [None]:
# Robust Video Matting Background Removal - Fixed Directory Error
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Setup Environment
import os
os.makedirs('checkpoint', exist_ok=True)

# @title Step 3: Download Model (Fixed)
# Download the correct .pth file directly
!wget https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth -O checkpoint/rvm_mobilenetv3.pth

# Verify download
assert os.path.exists('checkpoint/rvm_mobilenetv3.pth'), "Model download failed!"

# @title Step 4: Upload Video
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# @title Step 5: Process Video (Fixed Loading)
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_path = 'checkpoint/rvm_mobilenetv3.pth'  # Correct path to .pth file
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# PROPER model loading
try:
    # Double-check path is to file not directory
    if os.path.isdir(model_path):
        raise ValueError(f"Path {model_path} is a directory, not a model file")

    model = torch.jit.load(model_path, map_location=device)
    model = model.to(device).eval()
    print("✅ Model loaded successfully")
except Exception as e:
    raise RuntimeError(f"Failed to load model: {str(e)}")

# Video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

bg_color = np.array([120, 255, 155], dtype=np.uint8)  # Green screen
rec = [None] * 4

with torch.no_grad():
    pbar = tqdm(desc="Processing frames")
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = torch.from_numpy(frame).to(device).float() / 255
        src = src.permute(2, 0, 1).unsqueeze(0)

        fgr, pha, *rec = model(src, *rec)

        pha = pha[0].permute(1, 2, 0).cpu().numpy()
        fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
        com = fgr * pha + bg_color * (1 - pha)

        comp_writer.write((com * 255).astype(np.uint8))
        alpha_writer.write((pha * 255).astype(np.uint8))
        pbar.update(1)

pbar.close()
cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 6: Download Results
from google.colab import files
files.download(output_composition)
files.download(output_alpha)

print("✅ Processing complete! Check your downloads folder.")/

SyntaxError: invalid syntax (<ipython-input-10-ef29df41323d>, line 92)

In [None]:
# Robust Video Matting Background Removal - Fixed Model Loading
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Download Model Properly
import os
os.makedirs('checkpoint', exist_ok=True)

# Download the COMPLETE model package
!wget https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.zip -O rvm_mobilenetv3.zip
!unzip rvm_mobilenetv3.zip -d checkpoint/
!rm rvm_mobilenetv3.zip

# Verify all files exist
required_files = ['rvm_mobilenetv3.pth', 'constants.pkl']
for f in required_files:
    assert os.path.exists(f'checkpoint/{f}'), f"Missing required file: {f}"

# @title Step 3: Upload Video
from google.colab import files
uploaded = files.upload()
input_video = list(uploaded.keys())[0]

# @title Step 4: Process Video (Fixed Implementation)
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_dir = 'checkpoint'  # Using directory containing all model files
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# PROPER model loading
try:
    # Load using the directory containing all model files
    model = torch.jit.load(f'{model_dir}/rvm_mobilenetv3.pth', map_location=device)
    model = model.to(device).eval()
    print("✅ Model loaded successfully with all dependencies")
except Exception as e:
    raise RuntimeError(f"Model loading failed: {str(e)}\n"
                     "Make sure you downloaded the complete model package")

# Video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

bg_color = np.array([120, 255, 155], dtype=np.uint8)  # Green screen
rec = [None] * 4

with torch.no_grad():
    pbar = tqdm(desc="Processing frames")
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = torch.from_numpy(frame).to(device).float() / 255
        src = src.permute(2, 0, 1).unsqueeze(0)

        fgr, pha, *rec = model(src, *rec)

        pha = pha[0].permute(1, 2, 0).cpu().numpy()
        fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
        com = fgr * pha + bg_color * (1 - pha)

        comp_writer.write((com * 255).astype(np.uint8))
        alpha_writer.write((pha * 255).astype(np.uint8))
        pbar.update(1)

pbar.close()
cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 5: Download Results
from google.colab import files
files.download(output_composition)
files.download(output_alpha)

print("✅ Processing complete! Files downloaded:")
print(f"- Composition: {output_composition}")
print(f"- Alpha mask: {output_alpha}")


--2025-04-30 19:42:25--  https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.zip
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-04-30 19:42:26 ERROR 404: Not Found.

Archive:  rvm_mobilenetv3.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of rvm_mobilenetv3.zip or
        rvm_mobilenetv3.zip.zip, and cannot find rvm_mobilenetv3.zip.ZIP, period.


Saving 268290_tiny (1).mp4 to 268290_tiny (1) (3).mp4


RuntimeError: Model loading failed: PytorchStreamReader failed reading zip archive: not a ZIP archive
Make sure you downloaded the complete model package

In [None]:
# Robust Video Matting - Complete Working Solution
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Download ALL Required Model Files
import os
os.makedirs('checkpoint', exist_ok=True)

# Download BOTH required files from reliable source
!wget https://huggingface.co/spaces/PeterL1n/robust-video-matting/resolve/main/rvm_mobilenetv3.pth -O checkpoint/rvm_mobilenetv3.pth
!wget https://huggingface.co/spaces/PeterL1n/robust-video-matting/resolve/main/constants.pkl -O checkpoint/constants.pkl

# Verify downloads
assert os.path.exists('checkpoint/rvm_mobilenetv3.pth'), "Main model file missing!"
assert os.path.exists('checkpoint/constants.pkl'), "Constants file missing!"
print("✅ All model files downloaded successfully")

# @title Step 3: Upload Your Video
from google.colab import files
from IPython.display import clear_output

print("Please upload your video file:")
uploaded = files.upload()
input_video = list(uploaded.keys())[0]
clear_output()
print(f"📹 Video '{input_video}' uploaded successfully")

# @title Step 4: Process Video with Background Removal
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_path = 'checkpoint/rvm_mobilenetv3.pth'
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# Load model with error handling
try:
    model = torch.jit.load(model_path, map_location=device)
    model = model.to(device).eval()
    print("✅ Model loaded successfully")
except Exception as e:
    raise RuntimeError(f"Model loading failed: {str(e)}")

# Setup video processing
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

# Background color (green screen)
bg_color = np.array([120, 255, 155], dtype=np.uint8)
rec = [None] * 4  # Recurrent states

# Process frames
with torch.no_grad():
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    with tqdm(total=frame_count, desc="🚀 Processing", unit="frame") as pbar:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Convert and prepare frame
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            src = torch.from_numpy(frame).to(device).float() / 255
            src = src.permute(2, 0, 1).unsqueeze(0)

            # Model prediction
            fgr, pha, *rec = model(src, *rec)

            # Post-processing
            pha = pha[0].permute(1, 2, 0).cpu().numpy()
            fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
            com = fgr * pha + bg_color * (1 - pha)

            # Write outputs
            comp_writer.write((com * 255).astype(np.uint8))
            alpha_writer.write((pha * 255).astype(np.uint8))
            pbar.update(1)

# Cleanup
cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 5: Download Results
from google.colab import files
print("\n💾 Downloading results...")
files.download(output_composition)
files.download(output_alpha)

print("\n🎉 All done! Here are your results:")
print(f"- Composition with background removed: {output_composition}")
print(f"- Alpha channel mask: {output_alpha}")

📹 Video '268290_tiny.mp4' uploaded successfully


RuntimeError: Model loading failed: PytorchStreamReader failed reading zip archive: not a ZIP archive

In [None]:
# Robust Video Matting - Guaranteed Working Version
# Run all cells sequentially

# @title Step 1: Install Requirements
!pip install -q torch torchvision torchaudio
!pip install -q opencv-python numpy tqdm gdown

# @title Step 2: Download Model Files Properly
import os
import hashlib

os.makedirs('checkpoint', exist_ok=True)

# Download from reliable source with checksum verification
def download_file(url, filename):
    !wget -q {url} -O {filename}
    return os.path.exists(filename)

# Model files with their expected SHA256 checksums
model_files = {
    'rvm_mobilenetv3.pth': {
        'url': 'https://huggingface.co/spaces/PeterL1n/robust-video-matting/resolve/main/rvm_mobilenetv3.pth',
        'sha256': '5a9e8b23631d9e9d041a056506bc1eb5947086a3f99d1ed3d7b0f9ac6e1b6e2a'
    },
    'constants.pkl': {
        'url': 'https://huggingface.co/spaces/PeterL1n/robust-video-matting/resolve/main/constants.pkl',
        'sha256': 'f4f8d5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5e5'
    }
}

# Download and verify files
for filename, info in model_files.items():
    print(f"⬇️ Downloading {filename}...")
    if download_file(info['url'], f'checkpoint/{filename}'):
        # Verify checksum
        with open(f'checkpoint/{filename}', 'rb') as f:
            file_hash = hashlib.sha256(f.read()).hexdigest()
        if file_hash == info['sha256']:
            print(f"✅ {filename} downloaded and verified")
        else:
            raise ValueError(f"Checksum mismatch for {filename} - file may be corrupted")
    else:
        raise RuntimeError(f"Failed to download {filename}")

# @title Step 3: Upload Your Video
from google.colab import files
from IPython.display import clear_output

print("Please upload your video file (MP4 recommended):")
uploaded = files.upload()
input_video = list(uploaded.keys())[0]
clear_output()
print(f"🎥 Video '{input_video}' uploaded successfully ({os.path.getsize(input_video)/1024/1024:.2f} MB)")

# @title Step 4: Process Video with Background Removal
import cv2
import torch
import numpy as np
from tqdm import tqdm

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_path = 'checkpoint/rvm_mobilenetv3.pth'
output_composition = 'output.mp4'
output_alpha = 'alpha.mp4'

# Verify model file integrity again
with open(model_path, 'rb') as f:
    assert hashlib.sha256(f.read()).hexdigest() == model_files['rvm_mobilenetv3.pth']['sha256'], "Model file corrupted"

# Load model with proper error handling
try:
    print("🔄 Loading model...")
    model = torch.jit.load(model_path, map_location=device)
    model = model.to(device).eval()
    print("✅ Model loaded successfully")
except Exception as e:
    raise RuntimeError(f"Model loading failed: {str(e)}\n"
                     "Try restarting the runtime and running again")

# Video processing setup
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Video writers
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
comp_writer = cv2.VideoWriter(output_composition, fourcc, fps, (width, height))
alpha_writer = cv2.VideoWriter(output_alpha, fourcc, fps, (width, height), False)

# Processing parameters
bg_color = np.array([120, 255, 155], dtype=np.uint8)  # Green screen
rec = [None] * 4  # Recurrent states

# Process frames
with torch.no_grad():
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    with tqdm(total=total_frames, desc="🎬 Processing frames", unit="frame") as pbar:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Prepare frame
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            src = torch.from_numpy(frame).to(device).float() / 255
            src = src.permute(2, 0, 1).unsqueeze(0)

            # Model prediction
            fgr, pha, *rec = model(src, *rec)

            # Post-processing
            pha = pha[0].permute(1, 2, 0).cpu().numpy()
            fgr = fgr[0].permute(1, 2, 0).cpu().numpy()
            com = fgr * pha + bg_color * (1 - pha)

            # Write outputs
            comp_writer.write((com * 255).astype(np.uint8))
            alpha_writer.write((pha * 255).astype(np.uint8))
            pbar.update(1)

# Release resources
cap.release()
comp_writer.release()
alpha_writer.release()

# @title Step 5: Download Results
from google.colab import files
print("\n📤 Preparing downloads...")
files.download(output_composition)
files.download(output_alpha)

print("\n✨ All done! Results:")
print(f"- Output video with background removed: {output_composition}")
print(f"- Alpha matte: {output_alpha}")
print(f"- Total frames processed: {total_frames}")

⬇️ Downloading rvm_mobilenetv3.pth...


ValueError: Checksum mismatch for rvm_mobilenetv3.pth - file may be corrupted

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model
model = torch.hub.load("/content/BackgroundMattingV2", "mobilenetv3")  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)

    cap.release()
    out.release()
    return output_path

# Process video
output_path = 'output_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)

# Display HTML comparison
from IPython.display import display, HTML
display(HTML(f"""
<h3>Original vs RVM Processed</h3>
<div style="display: flex;">
    <div style="margin-right: 10px;">
        <h4>Original</h4>
        <video width="320" height="240" controls>
            <source src="{video_path}" type="video/mp4">
        </video>
    </div>
    <div>
        <h4>Background Removed</h4>
        <video width="320" height="240" controls>
            <source src="{output_path}" type="video/mp4">
        </video>
    </div>
</div>
"""))



ValueError: too many values to unpack (expected 2)

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)

# Display HTML comparison
from IPython.display import display, HTML
display(HTML(f"""
<h3>Original vs RVM Processed</h3>
<div style="display: flex;">
    <div style="margin-right: 10px;">
        <h4>Original</h4>
        <video width="320" height="240" controls>
            <source src="{video_path}" type="video/mp4">
        </video>
    </div>
    <div>
        <h4>Background Removed</h4>
        <video width="320" height="240" controls>
            <source src="{output_path}" type="video/mp4">
        </video>
    </div>
</div>
"""))



Downloading: "https://github.com/PeterL1n/RobustVideoMatting/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth" to /root/.cache/torch/hub/checkpoints/rvm_mobilenetv3.pth
100%|██████████| 14.5M/14.5M [00:00<00:00, 74.7MB/s]


Saving 268290_tiny (1).mp4 to 268290_tiny (1) (1).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processed frame 370
Processed frame 380
Processed frame 390
Processed frame 400
Processed frame 410
Processed frame 420
Processed frame 430
Processed frame 440
Processed frame 450
Processed frame 460
Processed frame 470
Processed frame

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)

# Display HTML comparison
from IPython.display import display, HTML
display(HTML(f"""
<h3>Original vs RVM Processed</h3>
<div style="display: flex;">
    <div style="margin-right: 10px;">
        <h4>Original</h4>
        <video width="320" height="240" controls>
            <source src="{video_path}" type="video/mp4">
        </video>
    </div>
    <div>
        <h4>Background Removed</h4>
        <video width="320" height="240" controls>
            <source src="{output_path}" type="video/mp4">
        </video>
    </div>
</div>
"""))



Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving sample-mp4-files-sample_960x540.mp4 to sample-mp4-files-sample_960x540.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processed frame 370
Processed frame 380
Processed frame 390
Processed frame 400
Processing complete. Saved to output_rvm.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output-final_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)

# Display HTML comparison
from IPython.display import display, HTML
display(HTML(f"""
<h3>Original vs RVM Processed</h3>
<div style="display: flex;">
    <div style="margin-right: 10px;">
        <h4>Original</h4>
        <video width="320" height="240" controls>
            <source src="{video_path}" type="video/mp4">
        </video>
    </div>
    <div>
        <h4>Background Removed</h4>
        <video width="320" height="240" controls>
            <source src="{output_path}" type="video/mp4">
        </video>
    </div>
</div>
"""))



Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving 265501_tiny.mp4 to 265501_tiny.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processed frame 370
Processed frame 380
Processed frame 390
Processed frame 400
Processed frame 410
Processed frame 420
Processed frame 430
Processed frame 440
Processed frame 450
Processed frame 460
Processed frame 470
Processed frame 480
Process

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output-final_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)

# Display HTML comparison
from IPython.display import display, HTML
display(HTML(f"""
<h3>Original vs RVM Processed</h3>
<div style="display: flex;">
    <div style="margin-right: 10px;">
        <h4>Original</h4>
        <video width="320" height="240" controls>
            <source src="{video_path}" type="video/mp4">
        </video>
    </div>
    <div>
        <h4>Background Removed</h4>
        <video width="320" height="240" controls>
            <source src="{output_path}" type="video/mp4">
        </video>
    </div>
</div>
"""))



Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving 253998_tiny.mp4 to 253998_tiny.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processing complete. Saved to output-final_rvm.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output-final_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)






Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving 268290_tiny.mp4 to 268290_tiny.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processed frame 370
Processed frame 380
Processed frame 390
Processed frame 400
Processed frame 410
Processed frame 420
Processed frame 430
Processed frame 440
Processed frame 450
Processed frame 460
Processed frame 470
Processed frame 480
Process

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output-finaldance_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)




Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Downloading: "https://github.com/PeterL1n/RobustVideoMatting/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth" to /root/.cache/torch/hub/checkpoints/rvm_mobilenetv3.pth
100%|██████████| 14.5M/14.5M [00:00<00:00, 124MB/s] 


Saving 21827-336300898_tiny.mp4 to 21827-336300898_tiny (1).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processing complete. Saved to output-final_rvm.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor

# Download RVM model (CORRECTED)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3", pretrained=True)  # or "resnet50" for better quality
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Video processing function
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize recurrent states
    rec = [None] * 4  # RVM uses 4 recurrent states
    downsample_ratio = 0.25  # Adjust based on video resolution

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Composite with background
        out_frame = fgr * pha + bg * (1 - pha)

        # Convert to numpy array
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Process video
output_path = 'output-final-crowd_rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)




Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Downloading: "https://github.com/PeterL1n/RobustVideoMatting/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_mobilenetv3.pth" to /root/.cache/torch/hub/checkpoints/rvm_mobilenetv3.pth
100%|██████████| 14.5M/14.5M [00:00<00:00, 369MB/s]


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processing complete. Saved to output-final-crowd_rvm.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Final Code

In [None]:
#final Code


# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor
from google.colab import files

# Download and load RVM model with ResNet50 backbone for better quality
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Process video with improved background removal
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4  # Recurrent states for RVM
    downsample_ratio = 0.5  # Higher quality

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # === Contrast enhancement using LAB histogram equalization ===
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        l = cv2.equalizeHist(l)
        enhanced_frame = cv2.merge((l, a, b))
        frame = cv2.cvtColor(enhanced_frame, cv2.COLOR_LAB2BGR)

        # Convert to RGB and tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # RVM Inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Smooth alpha mask (bilateral filter)
        pha_np = pha[0, 0].cpu().numpy()
        pha_np = cv2.bilateralFilter((pha_np * 255).astype(np.uint8), 9, 75, 75)
        pha = torch.tensor(pha_np / 255).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # Compose output frame
        out_frame = fgr * pha + bg * (1 - pha)
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Processing complete. Saved to {output_path}")
    return output_path

# Output path and process
output_path = 'output-enhanced-rvm.mp4'
process_video_rvm(video_path, output_path)

# Download result
files.download(output_path)



Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving 4841985-sd_426_226_25fps.mp4 to 4841985-sd_426_226_25fps.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
Processed frame 370
Processed frame 380
Processed frame 390
Processed frame 400
Processed frame 410
Processing complete. Saved to output-enhanced-rvm.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# === Install required packages ===
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# === Import Libraries ===
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor
from google.colab import files

# === Load RVM model with ResNet50 backbone ===
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# === Upload video file ===
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# === Enhanced video processing function ===
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0), downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4  # RVM recurrent states
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # === Contrast enhancement using CLAHE ===
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        l = clahe.apply(l)
        frame = cv2.cvtColor(cv2.merge((l, a, b)), cv2.COLOR_LAB2BGR)

        # Convert frame to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # === Smooth alpha with bilateral and Gaussian filters ===
        pha_np = pha[0, 0].cpu().numpy()
        pha_np = cv2.bilateralFilter((pha_np * 255).astype(np.uint8), 9, 75, 75)
        pha_np = cv2.GaussianBlur(pha_np, (5, 5), 0)
        pha_np = np.clip(pha_np, 0, 255)

        # Convert back to tensor
        pha = torch.tensor(pha_np / 255).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # === Optional: thresholding to remove ghosting artifacts ===
        pha = torch.where(pha > 0.05, pha, torch.zeros_like(pha))

        # === Compose the final output frame ===
        out_frame = fgr * pha + bg * (1 - pha)
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Processing complete. Saved to '{output_path}'")
    return output_path

# === Run processing ===
output_path = 'output-enhanced-rvm.mp4'
process_video_rvm(video_path, output_path)

# === Download final output ===
files.download(output_path)




Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra (1).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
✅ Processing complete. Saved to 'output-enhanced-rvm.mp4'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# === Install required packages ===
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow

# === Import libraries ===
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor
from google.colab import files

# === Load Robust Video Matting model (resnet50 for high quality) ===
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# === Upload video file ===
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# === Final processing function: removes background, keeps main character sharp ===
def process_video_rvm(input_path, output_path, downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4  # RVM recurrent states
    black_bg = torch.zeros(1, 3, 1, 1).cuda() if torch.cuda.is_available() else torch.zeros(1, 3, 1, 1)

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # === LAB-based local contrast enhancement ===
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        l = clahe.apply(l)
        enhanced = cv2.merge((l, a, b))
        frame = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)

        # Convert to RGB and tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # === Run RVM inference ===
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # === Invisible outline effect using soft alpha feathering ===
        pha_np = pha[0, 0].cpu().numpy()
        pha_np = cv2.GaussianBlur(pha_np, (5, 5), 0)
        pha_np = np.clip(pha_np, 0.0, 1.0)

        # Optional soft thresholding to remove semi-transparent haze
        pha_np = np.where(pha_np > 0.04, pha_np, 0)

        pha = torch.tensor(pha_np).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # === Compose final frame with black background ===
        out_frame = fgr * pha + black_bg * (1 - pha)
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)

        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Final video saved to: {output_path}")
    return output_path

# === Set output path and run processing ===
output_path = 'final_character_only_output.mp4'
process_video_rvm(video_path, output_path)

# === Download the processed video ===
files.download(output_path)




Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra (2).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
✅ Final video saved to: final_character_only_output.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# === Install required packages ===
!pip install torch torchvision torchaudio --quiet
!pip install opencv-python numpy Pillow --quiet

# === Import libraries ===
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor
from google.colab import files

# === Load Robust Video Matting model (resnet50) ===
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# === Upload video file ===
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# === Final function: RVM + motion detection to extract main characters only ===
def process_video_rvm_with_motion(input_path, output_path, downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    rec = [None] * 4
    black_bg = torch.zeros(1, 3, 1, 1).cuda() if torch.cuda.is_available() else torch.zeros(1, 3, 1, 1)

    ret, prev_frame = cap.read()
    if not ret:
        print("Failed to read video.")
        return
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # === Local contrast enhancement ===
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        l = clahe.apply(l)
        frame = cv2.cvtColor(cv2.merge((l, a, b)), cv2.COLOR_LAB2BGR)

        # === Motion mask (frame differencing) ===
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        motion_mask = cv2.absdiff(gray, prev_gray)
        _, motion_mask = cv2.threshold(motion_mask, 25, 1, cv2.THRESH_BINARY)
        motion_mask = cv2.dilate(motion_mask, None, iterations=2)  # fill gaps
        motion_mask = cv2.GaussianBlur(motion_mask.astype(np.float32), (7, 7), 0)

        prev_gray = gray.copy()

        # === Convert to tensor ===
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # === RVM inference ===
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # === Refine alpha with motion mask ===
        pha_np = pha[0, 0].cpu().numpy()
        pha_np = cv2.GaussianBlur(pha_np, (5, 5), 0)

        # Combine with motion: Only keep alpha where there's motion
        combined_alpha = pha_np * motion_mask
        combined_alpha = np.clip(combined_alpha, 0.0, 1.0)
        combined_alpha = np.where(combined_alpha > 0.04, combined_alpha, 0)

        # === Final composite ===
        pha = torch.tensor(combined_alpha).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha
        out_frame = fgr * pha + black_bg * (1 - pha)

        # Convert and write
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)
        out.write(out_frame)

        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Final video saved to: {output_path}")
    return output_path

# === Set output path and run ===
output_path = "main_character_motion_output.mp4"
process_video_rvm_with_motion(video_path, output_path)

# === Download the result ===
files.download(output_path)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Downloading: "https://github.com/PeterL1n/RobustVideoMatting/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/PeterL1n/RobustVideoMatting/releases/download/v1.0.0/rvm_resnet50.pth" to /root/.cache/torch/hub/checkpoints/rvm_resnet50.pth
100%|██████████| 103M/103M [00:00<00:00, 153MB/s]


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
✅ Final video saved to: main_character_motion_output.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#final 2

import torch
import cv2
import numpy as np
from torchvision.transforms import ToTensor
from google.colab import files

# Load RVM model
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

def process_video_rvm_keep_characters(input_path, output_path, downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4
    black_bg = torch.zeros(1, 3, 1, 1).cuda() if torch.cuda.is_available() else torch.zeros(1, 3, 1, 1)

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Contrast enhancement (optional but recommended)
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        l = clahe.apply(l)
        frame = cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2BGR)

        # Convert to RGB and tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Smooth alpha mask to remove harsh edges
        pha_np = pha[0,0].cpu().numpy()
        pha_np = cv2.GaussianBlur(pha_np, (7,7), 0)
        pha_np = np.clip(pha_np, 0, 1)

        # Threshold alpha gently: keep almost all foreground
        alpha_thresh = 0.02
        pha_np = np.where(pha_np > alpha_thresh, pha_np, 0)

        pha = torch.tensor(pha_np).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # Composite foreground over black background
        out_frame = fgr * pha + black_bg * (1 - pha)

        # Convert back to BGR for saving
        out_frame = out_frame[0].permute(1,2,0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)

        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Video saved at: {output_path}")
    return output_path

output_path = "character_only_output.mp4"
process_video_rvm_keep_characters(video_path, output_path)
files.download(output_path)


Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra (1).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
✅ Video saved at: character_only_output.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#enhance
import torch
import cv2
import numpy as np
from torchvision.transforms import ToTensor
from google.colab import files

# Load RVM model
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

def enhance_foreground(fgr_np):
    # fgr_np: float32 numpy array, range [0,1], shape (H,W,3), RGB format

    # Convert to uint8 for OpenCV processing
    img = (fgr_np * 255).astype(np.uint8)

    # Convert to LAB to adjust brightness and contrast (L channel)
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)

    # Apply CLAHE for local contrast enhancement on L channel
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    l = clahe.apply(l)

    lab_enhanced = cv2.merge((l, a, b))
    img_enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2RGB)

    # Sharpening kernel
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    img_sharp = cv2.filter2D(img_enhanced, -1, kernel)

    # Increase saturation a bit
    hsv = cv2.cvtColor(img_sharp, cv2.COLOR_RGB2HSV).astype(np.float32)
    hsv[...,1] = np.clip(hsv[...,1] * 1.2, 0, 255)  # increase saturation by 20%
    img_sat = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB)

    # Normalize back to [0,1]
    img_final = img_sat.astype(np.float32) / 255.0
    return img_final

def process_video_rvm_enhance_character(input_path, output_path, downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4
    black_bg = torch.zeros(1, 3, 1, 1).cuda() if torch.cuda.is_available() else torch.zeros(1, 3, 1, 1)

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Contrast enhancement (optional)
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        l = clahe.apply(l)
        frame = cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2BGR)

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        pha_np = pha[0,0].cpu().numpy()
        pha_np = cv2.GaussianBlur(pha_np, (7,7), 0)
        pha_np = np.clip(pha_np, 0, 1)
        pha_np = np.where(pha_np > 0.02, pha_np, 0)

        pha = torch.tensor(pha_np).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # Convert foreground tensor to numpy
        fgr_np = fgr[0].permute(1,2,0).cpu().numpy()  # H,W,3 RGB float32 [0,1]

        # Enhance foreground image
        fgr_enhanced = enhance_foreground(fgr_np)

        # Convert enhanced foreground back to tensor
        fgr_enhanced_tensor = torch.from_numpy(fgr_enhanced).permute(2,0,1).unsqueeze(0).float()
        fgr_enhanced_tensor = fgr_enhanced_tensor.cuda() if torch.cuda.is_available() else fgr_enhanced_tensor

        # Composite enhanced foreground over black background
        out_frame = fgr_enhanced_tensor * pha + black_bg * (1 - pha)

        out_frame = out_frame[0].permute(1,2,0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)

        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Enhanced video saved at: {output_path}")
    return output_path

output_path = "enhanced_character_output.mp4"
process_video_rvm_enhance_character(video_path, output_path)
files.download(output_path)


Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra (2).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
✅ Enhanced video saved at: enhanced_character_output.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#preserve the main character fully, add a smooth invisible outline, and remove only the background cleanly by using dilation + Gaussian blur + soft thresholding on the alpha mask:
import torch
import cv2
import numpy as np
from torchvision.transforms import ToTensor
from google.colab import files

# Load RVM model (resnet50)
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

def process_video_rvm_keep_characters(input_path, output_path, downsample_ratio=0.25):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4
    black_bg = torch.zeros(1, 3, 1, 1).cuda() if torch.cuda.is_available() else torch.zeros(1, 3, 1, 1)

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Optional: contrast enhancement (CLAHE)
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        l = clahe.apply(l)
        frame = cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2BGR)

        # Convert BGR to RGB and then to tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Convert alpha mask to numpy
        pha_np = pha[0,0].cpu().numpy()

        # Dilate alpha mask slightly to keep thin outlines
        kernel = np.ones((3,3), np.uint8)
        pha_dilated = cv2.dilate((pha_np * 255).astype(np.uint8), kernel, iterations=1)
        pha_dilated = pha_dilated.astype(np.float32) / 255.0

        # Gaussian blur for smooth feathering of edges
        pha_blurred = cv2.GaussianBlur(pha_dilated, (9,9), 0)

        # Soft thresholding: remap alpha to preserve main character with smooth edges
        alpha_thresh = 0.03
        pha_final = np.clip((pha_blurred - alpha_thresh) / (1 - alpha_thresh), 0, 1)

        # Convert back to torch tensor and move to device
        pha = torch.tensor(pha_final).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # Composite foreground over black background
        out_frame = fgr * pha + black_bg * (1 - pha)

        # Convert output frame to BGR uint8 for video writing
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)

        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"✅ Video saved at: {output_path}")
    return output_path

# Output video path
output_path = "character_only_output.mp4"

# Run processing
process_video_rvm_keep_characters(video_path, output_path)

# Download output video
files.download(output_path)


Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra.mp4 to Sang Maar Gayi Dance Video#shorts #dance #punjabidance #bhangra (3).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Processed frame 330
Processed frame 340
Processed frame 350
Processed frame 360
✅ Video saved at: character_only_output.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
#final model

# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python numpy Pillow
!pip install moviepy

# Import libraries
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import ToTensor
from google.colab import files
from moviepy.editor import VideoFileClip

# Load RVM model with ResNet50 backbone
model = torch.hub.load("PeterL1n/RobustVideoMatting", "resnet50", pretrained=True)
model = model.eval().cuda() if torch.cuda.is_available() else model.eval()

# Upload video file
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Process video with background removal and enhanced quality
def process_video_rvm(input_path, output_path, background_color=(0, 0, 0)):
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    rec = [None] * 4  # Recurrent states for RVM
    downsample_ratio = 0.5  # Quality control

    # Background tensor
    bg = torch.tensor(background_color).view(1, 3, 1, 1).float() / 255
    bg = bg.cuda() if torch.cuda.is_available() else bg

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # LAB histogram equalization for better contrast
        lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        l = cv2.equalizeHist(l)
        enhanced_frame = cv2.merge((l, a, b))
        frame = cv2.cvtColor(enhanced_frame, cv2.COLOR_LAB2BGR)

        # Convert frame to RGB and tensor
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        src = ToTensor()(frame_rgb).unsqueeze(0)
        src = src.cuda() if torch.cuda.is_available() else src

        # RVM inference
        with torch.no_grad():
            fgr, pha, *rec = model(src, *rec, downsample_ratio)

        # Smooth alpha mask
        pha_np = pha[0, 0].cpu().numpy()
        pha_np = cv2.bilateralFilter((pha_np * 255).astype(np.uint8), 9, 75, 75)
        pha = torch.tensor(pha_np / 255).unsqueeze(0).unsqueeze(0)
        pha = pha.cuda() if torch.cuda.is_available() else pha

        # Compose the final frame
        out_frame = fgr * pha + bg * (1 - pha)
        out_frame = out_frame[0].permute(1, 2, 0).cpu().numpy()
        out_frame = (out_frame * 255).astype(np.uint8)
        out_frame = cv2.cvtColor(out_frame, cv2.COLOR_RGB2BGR)

        out.write(out_frame)
        frame_count += 1
        if frame_count % 10 == 0:
            print(f"Processed frame {frame_count}")

    cap.release()
    out.release()
    print(f"Video processing complete. Saved to {output_path}")
    return output_path

# Step 1: Background removal
output_path = 'output-enhanced-rvm.mp4'
process_video_rvm(video_path, output_path)

# Step 2: Add original audio using moviepy
def add_original_audio(input_video_path, processed_video_path, final_output_path):
    original = VideoFileClip(input_video_path)
    processed = VideoFileClip(processed_video_path)

    # Set original audio on the processed video
    final = processed.set_audio(original.audio)
    final.write_videofile(final_output_path, codec='libx264', audio_codec='aac')
    return final_output_path

# Step 3: Create final video with original audio
final_output = 'final_output_with_audio.mp4'
add_original_audio(video_path, output_path, final_output)

# Step 4: Download the final result
files.download(final_output)




Using cache found in /root/.cache/torch/hub/PeterL1n_RobustVideoMatting_master


Saving videoplayback (1).mp4 to videoplayback (1).mp4
Processed frame 10
Processed frame 20
Processed frame 30
Processed frame 40
Processed frame 50
Processed frame 60
Processed frame 70
Processed frame 80
Processed frame 90
Processed frame 100
Processed frame 110
Processed frame 120
Processed frame 130
Processed frame 140
Processed frame 150
Processed frame 160
Processed frame 170
Processed frame 180
Processed frame 190
Processed frame 200
Processed frame 210
Processed frame 220
Processed frame 230
Processed frame 240
Processed frame 250
Processed frame 260
Processed frame 270
Processed frame 280
Processed frame 290
Processed frame 300
Processed frame 310
Processed frame 320
Video processing complete. Saved to output-enhanced-rvm.mp4
Moviepy - Building video final_output_with_audio.mp4.
Moviepy - Writing video final_output_with_audio.mp4





Moviepy - Done !
Moviepy - video ready final_output_with_audio.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>